From 6bc0ce8046a8a2fc3c8842f398118c417c513ad8 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 25 Oct 2014 06:38:47 +0530 Subject: [PATCH 01/27] Intial commit for advanced tracer --- rpcs3/Emu/Cell/PPUInterpreter.h | 6 +- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 1638 +++++++++++++------------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 1559 ++++++++++++------------ rpcs3/emucore.vcxproj.filters | 2 +- 4 files changed, 1596 insertions(+), 1609 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index cdb7a2d2ce..da61d3f1b7 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -55,9 +55,13 @@ u64 rotr64(const u64 x, const u8 n) { return (x >> n) | (x << (64 - n)); } #define rotl64 _rotl64 #define rotr64 _rotr64 +namespace ppu_recompiler_llvm { + class Compiler; +} + class PPUInterpreter : public PPUOpcodes { - friend class PPULLVMRecompiler; + friend class ppu_recompiler_llvm::Compiler; private: PPUThread& CPU; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 5d66f575a9..4dda28cb55 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -21,13 +21,12 @@ #include "llvm/MC/MCDisassembler.h" using namespace llvm; +using namespace ppu_recompiler_llvm; -u64 PPULLVMRecompiler::s_rotate_mask[64][64]; -bool PPULLVMRecompiler::s_rotate_mask_inited = false; +u64 Compiler::s_rotate_mask[64][64]; +bool Compiler::s_rotate_mask_inited = false; -PPULLVMRecompiler::PPULLVMRecompiler() - : ThreadBase("PPULLVMRecompiler") - , m_revision(0) { +Compiler::Compiler() { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetDisassembler(); @@ -70,181 +69,164 @@ PPULLVMRecompiler::PPULLVMRecompiler() } } -PPULLVMRecompiler::~PPULLVMRecompiler() { - Stop(); - +Compiler::~Compiler() { delete m_execution_engine; delete m_fpm; delete m_ir_builder; delete m_llvm_context; } -std::pair PPULLVMRecompiler::GetExecutable(u32 address) { - std::lock_guard lock(m_compiled_shared_lock); +CompiledCodeFragment Compiler::Compile(const std::string & name, const CodeFragment & code_fragment) { + assert(!name.empty()); + assert(!code_fragment.empty()); - auto compiled = m_compiled_shared.lower_bound(std::make_pair(address, 0)); - if (compiled != m_compiled_shared.end() && compiled->first.first == address) { - compiled->second.second++; - return std::make_pair(compiled->second.first, compiled->first.second); - } + auto compilation_start = std::chrono::high_resolution_clock::now(); - return std::make_pair(nullptr, 0); -} + // Create the function + m_current_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(), + m_ir_builder->getInt8PtrTy() /*ppu_state*/, + m_ir_builder->getInt8PtrTy() /*interpreter*/, + m_ir_builder->getInt8PtrTy() /*tracer*/, nullptr); + m_current_function->setCallingConv(CallingConv::X86_64_Win64); + auto arg_i = m_current_function->arg_begin(); + arg_i->setName("ppu_state"); + (++arg_i)->setName("interpreter"); + (++arg_i)->setName("tracer"); -void PPULLVMRecompiler::ReleaseExecutable(u32 address, u32 revision) { - std::lock_guard lock(m_compiled_shared_lock); + // Create the entry block + GetBasicBlockFromAddress(0, m_current_function, true); - auto compiled = m_compiled_shared.find(std::make_pair(address, revision)); - if (compiled != m_compiled_shared.end()) { - compiled->second.second--; - } -} + // Create basic blocks for each instruction + for (auto i = code_fragment.begin(); i != code_fragment.end(); i++) { + u32 address = i->first.address; + while (1) { + GetBasicBlockFromAddress(address, m_current_function, true); -void PPULLVMRecompiler::RequestCompilation(u32 address) { - { - std::lock_guard lock(m_uncompiled_shared_lock); - m_uncompiled_shared.push_back(address); - } - - if (!IsAlive()) { - Start(); - } - - Notify(); -} - -u32 PPULLVMRecompiler::GetCurrentRevision() { - return m_revision.load(std::memory_order_relaxed); -} - -void PPULLVMRecompiler::Task() { - auto start = std::chrono::high_resolution_clock::now(); - - while (!TestDestroy() && !Emu.IsStopped()) { - // Wait a few ms for something to happen - auto idling_start = std::chrono::high_resolution_clock::now(); - WaitForAnySignal(250); - auto idling_end = std::chrono::high_resolution_clock::now(); - m_idling_time += std::chrono::duration_cast(idling_end - idling_start); - - // Update the set of blocks that have been hit with the set of blocks that have been requested for compilation. - { - std::lock_guard lock(m_uncompiled_shared_lock); - for (auto i = m_uncompiled_shared.begin(); i != m_uncompiled_shared.end(); i++) { - m_hit_blocks.insert(*i); + u32 instr = vm::read32(address); + if (IsBranchInstruction(instr)) { + break; } + + address += 4; } + } - u32 num_compiled = 0; - while (!TestDestroy() && !Emu.IsStopped()) { - u32 address; + // Add code to notify the tracer about this function and branch to the first instruction + m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0, m_current_function)); + //Call("Tracer.Trace", &Tracer::Trace, *arg_i, + // m_ir_builder->getIntN(sizeof(Tracer::BranchType) * 8, code_fragment[0].first.type == FunctionStart ? Tracer::BranchType::CompiledFunctionCall : Tracer::BranchType::CompiledBlock), + // m_ir_builder->getInt32(code_fragment[0].first.address)); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(code_fragment[0].first.address, m_current_function)); - { - std::lock_guard lock(m_uncompiled_shared_lock); + // Convert each block in this code fragment to LLVM IR + for (auto i = code_fragment.begin(); i != code_fragment.end(); i++) { + m_current_instruction_address = i->first.address; + m_current_block_next_blocks = &(i->second); + auto block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); + m_hit_branch_instruction = false; + m_ir_builder->SetInsertPoint(block); - auto i = m_uncompiled_shared.begin(); - if (i != m_uncompiled_shared.end()) { - address = *i; - m_uncompiled_shared.erase(i); - } else { - break; - } + while (!m_hit_branch_instruction) { + if (!block->getInstList().empty()) { + break; } - m_hit_blocks.insert(address); - if (NeedsCompiling(address)) { - Compile(address); - num_compiled++; - } - } + u32 instr = vm::read32(m_current_instruction_address); + Decode(instr); - if (num_compiled == 0) { - // If we get here, it means the recompilation thread is idling. - // We use this oppurtunity to optimize the code. - RemoveUnusedOldVersions(); - for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) { - if (NeedsCompiling(i->first.first)) { - Compile(i->first.first); - num_compiled++; - } + m_current_instruction_address += 4; + if (!m_hit_branch_instruction) { + block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); + m_ir_builder->CreateBr(block); + m_ir_builder->SetInsertPoint(block); } } } - std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); - m_total_time = std::chrono::duration_cast(end - start); + // If the function has an unknown block then notify the tracer + auto unknown_bb = GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function); + if (!unknown_bb) { + m_ir_builder->SetInsertPoint(unknown_bb); + auto branch_type_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 1); + for (auto i = pred_begin(unknown_bb); i != pred_end(unknown_bb); i++) { + // We assume that the last but one instruction of the predecessor sets the branch type + auto j = (*i)->rbegin(); + j--; + branch_type_i32->addIncoming(&(*j), *i); + } - std::string error; - raw_fd_ostream log_file("PPULLVMRecompiler.log", error, sys::fs::F_Text); - log_file << "Total time = " << m_total_time.count() / 1000000 << "ms\n"; - log_file << " Time spent compiling = " << m_compilation_time.count() / 1000000 << "ms\n"; - log_file << " Time spent building IR = " << m_ir_build_time.count() / 1000000 << "ms\n"; - log_file << " Time spent optimizing = " << m_optimizing_time.count() / 1000000 << "ms\n"; - log_file << " Time spent translating = " << m_translation_time.count() / 1000000 << "ms\n"; - log_file << " Time spent idling = " << m_idling_time.count() / 1000000 << "ms\n"; - log_file << " Time spent doing misc tasks = " << (m_total_time.count() - m_idling_time.count() - m_compilation_time.count()) / 1000000 << "ms\n"; - log_file << "Revision = " << m_revision << "\n"; - log_file << "\nInterpreter fallback stats:\n"; - for (auto i = m_interpreter_fallback_stats.begin(); i != m_interpreter_fallback_stats.end(); i++) { - log_file << i->first << " = " << i->second << "\n"; + //Call("NotifyBranch", &Tracer::NotifyBranch, *arg_i, + // m_ir_builder->CreateZExtOrTrunc(branch_type_i32, m_ir_builder->getIntNTy(sizeof(Tracer::BranchType) * 8)), GetPc()); + m_ir_builder->CreateRetVoid(); } - log_file << "\nDisassembly:\n"; - //auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); - for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) { - log_file << fmt::Format("%s: Size = %u bytes, Number of instructions = %u\n", i->second.llvm_function->getName().str().c_str(), i->second.size, i->second.num_instructions); + auto ir_build_end = std::chrono::high_resolution_clock::now(); + m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); - //uint8_t * fn_ptr = (uint8_t *)i->second.executable; - //for (size_t pc = 0; pc < i->second.size;) { - // char str[1024]; + // Optimize this function + m_fpm->run(*m_current_function); + auto optimize_end = std::chrono::high_resolution_clock::now(); + m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); - // auto size = LLVMDisasmInstruction(disassembler, fn_ptr + pc, i->second.size - pc, (uint64_t)(fn_ptr + pc), str, sizeof(str)); - // log_file << str << '\n'; - // pc += size; - //} - } + // Translate to machine code + MachineCodeInfo mci; + m_execution_engine->runJITOnFunction(m_current_function, &mci); + auto translate_end = std::chrono::high_resolution_clock::now(); + m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); - //LLVMDisasmDispose(disassembler); + auto compilation_end = std::chrono::high_resolution_clock::now(); + m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); - //log_file << "\nLLVM IR:\n" << *m_module; - - LOG_NOTICE(PPU, "PPU LLVM compiler thread exiting."); + m_compiled[(CompiledCodeFragment)mci.address()] = m_current_function; + return (CompiledCodeFragment)mci.address(); } -void PPULLVMRecompiler::Decode(const u32 code) { +void Compiler::FreeCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment) { + auto i = m_compiled.find(compiled_code_fragment); + if (i != m_compiled.end()) { + m_execution_engine->freeMachineCodeForFunction(i->second); + i->second->eraseFromParent(); + } +} + +Compiler::Stats Compiler::GetStats() { + return m_stats; +} + +void Compiler::Decode(const u32 code) { (*PPU_instr::main_list)(this, code); } -void PPULLVMRecompiler::NULL_OP() { +void Compiler::NULL_OP() { InterpreterCall("NULL_OP", &PPUInterpreter::NULL_OP); } -void PPULLVMRecompiler::NOP() { +void Compiler::NOP() { InterpreterCall("NOP", &PPUInterpreter::NOP); } -void PPULLVMRecompiler::TDI(u32 to, u32 ra, s32 simm16) { +void Compiler::TDI(u32 to, u32 ra, s32 simm16) { InterpreterCall("TDI", &PPUInterpreter::TDI, to, ra, simm16); } -void PPULLVMRecompiler::TWI(u32 to, u32 ra, s32 simm16) { +void Compiler::TWI(u32 to, u32 ra, s32 simm16) { InterpreterCall("TWI", &PPUInterpreter::TWI, to, ra, simm16); } -void PPULLVMRecompiler::MFVSCR(u32 vd) { +void Compiler::MFVSCR(u32 vd) { auto vscr_i32 = GetVscr(); auto vscr_i128 = m_ir_builder->CreateZExt(vscr_i32, m_ir_builder->getIntNTy(128)); SetVr(vd, vscr_i128); } -void PPULLVMRecompiler::MTVSCR(u32 vb) { +void Compiler::MTVSCR(u32 vb) { auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto vscr_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(0)); vscr_i32 = m_ir_builder->CreateAnd(vscr_i32, 0x00010001); SetVscr(vscr_i32); } -void PPULLVMRecompiler::VADDCUW(u32 vd, u32 va, u32 vb) { +void Compiler::VADDCUW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); @@ -254,14 +236,14 @@ void PPULLVMRecompiler::VADDCUW(u32 vd, u32 va, u32 vb) { SetVr(vd, cmpv4i32); } -void PPULLVMRecompiler::VADDFP(u32 vd, u32 va, u32 vb) { +void Compiler::VADDFP(u32 vd, u32 va, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto sum_v4f32 = m_ir_builder->CreateFAdd(va_v4f32, vb_v4f32); SetVr(vd, sum_v4f32); } -void PPULLVMRecompiler::VADDSBS(u32 vd, u32 va, u32 vb) { +void Compiler::VADDSBS(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_b), va_v16i8, vb_v16i8); @@ -270,7 +252,7 @@ void PPULLVMRecompiler::VADDSBS(u32 vd, u32 va, u32 vb) { // TODO: Set VSCR.SAT } -void PPULLVMRecompiler::VADDSHS(u32 vd, u32 va, u32 vb) { +void Compiler::VADDSHS(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_w), va_v8i16, vb_v8i16); @@ -279,7 +261,7 @@ void PPULLVMRecompiler::VADDSHS(u32 vd, u32 va, u32 vb) { // TODO: Set VSCR.SAT } -void PPULLVMRecompiler::VADDSWS(u32 vd, u32 va, u32 vb) { +void Compiler::VADDSWS(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); @@ -320,14 +302,14 @@ void PPULLVMRecompiler::VADDSWS(u32 vd, u32 va, u32 vb) { // TODO: Set SAT } -void PPULLVMRecompiler::VADDUBM(u32 vd, u32 va, u32 vb) { +void Compiler::VADDUBM(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto sum_v16i8 = m_ir_builder->CreateAdd(va_v16i8, vb_v16i8); SetVr(vd, sum_v16i8); } -void PPULLVMRecompiler::VADDUBS(u32 vd, u32 va, u32 vb) { +void Compiler::VADDUBS(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_b), va_v16i8, vb_v16i8); @@ -336,14 +318,14 @@ void PPULLVMRecompiler::VADDUBS(u32 vd, u32 va, u32 vb) { // TODO: Set SAT } -void PPULLVMRecompiler::VADDUHM(u32 vd, u32 va, u32 vb) { +void Compiler::VADDUHM(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto sum_v8i16 = m_ir_builder->CreateAdd(va_v8i16, vb_v8i16); SetVr(vd, sum_v8i16); } -void PPULLVMRecompiler::VADDUHS(u32 vd, u32 va, u32 vb) { +void Compiler::VADDUHS(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_w), va_v8i16, vb_v8i16); @@ -352,14 +334,14 @@ void PPULLVMRecompiler::VADDUHS(u32 vd, u32 va, u32 vb) { // TODO: Set SAT } -void PPULLVMRecompiler::VADDUWM(u32 vd, u32 va, u32 vb) { +void Compiler::VADDUWM(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); SetVr(vd, sum_v4i32); } -void PPULLVMRecompiler::VADDUWS(u32 vd, u32 va, u32 vb) { +void Compiler::VADDUWS(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); @@ -371,14 +353,14 @@ void PPULLVMRecompiler::VADDUWS(u32 vd, u32 va, u32 vb) { // TODO: Set SAT } -void PPULLVMRecompiler::VAND(u32 vd, u32 va, u32 vb) { +void Compiler::VAND(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VANDC(u32 vd, u32 va, u32 vb) { +void Compiler::VANDC(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); vb_v4i32 = m_ir_builder->CreateNot(vb_v4i32); @@ -386,7 +368,7 @@ void PPULLVMRecompiler::VANDC(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VAVGSB(u32 vd, u32 va, u32 vb) { +void Compiler::VAVGSB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); @@ -398,7 +380,7 @@ void PPULLVMRecompiler::VAVGSB(u32 vd, u32 va, u32 vb) { SetVr(vd, avg_v16i8); } -void PPULLVMRecompiler::VAVGSH(u32 vd, u32 va, u32 vb) { +void Compiler::VAVGSH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); @@ -410,7 +392,7 @@ void PPULLVMRecompiler::VAVGSH(u32 vd, u32 va, u32 vb) { SetVr(vd, avg_v8i16); } -void PPULLVMRecompiler::VAVGSW(u32 vd, u32 va, u32 vb) { +void Compiler::VAVGSW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); @@ -422,21 +404,21 @@ void PPULLVMRecompiler::VAVGSW(u32 vd, u32 va, u32 vb) { SetVr(vd, avg_v4i32); } -void PPULLVMRecompiler::VAVGUB(u32 vd, u32 va, u32 vb) { +void Compiler::VAVGUB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto avg_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_b), va_v16i8, vb_v16i8); SetVr(vd, avg_v16i8); } -void PPULLVMRecompiler::VAVGUH(u32 vd, u32 va, u32 vb) { +void Compiler::VAVGUH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto avg_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_w), va_v8i16, vb_v8i16); SetVr(vd, avg_v8i16); } -void PPULLVMRecompiler::VAVGUW(u32 vd, u32 va, u32 vb) { +void Compiler::VAVGUW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto va_v4i64 = m_ir_builder->CreateZExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); @@ -448,7 +430,7 @@ void PPULLVMRecompiler::VAVGUW(u32 vd, u32 va, u32 vb) { SetVr(vd, avg_v4i32); } -void PPULLVMRecompiler::VCFSX(u32 vd, u32 uimm5, u32 vb) { +void Compiler::VCFSX(u32 vd, u32 uimm5, u32 vb) { auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto res_v4f32 = m_ir_builder->CreateSIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); @@ -460,7 +442,7 @@ void PPULLVMRecompiler::VCFSX(u32 vd, u32 uimm5, u32 vb) { SetVr(vd, res_v4f32); } -void PPULLVMRecompiler::VCFUX(u32 vd, u32 uimm5, u32 vb) { +void Compiler::VCFUX(u32 vd, u32 uimm5, u32 vb) { auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto res_v4f32 = m_ir_builder->CreateUIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); @@ -472,7 +454,7 @@ void PPULLVMRecompiler::VCFUX(u32 vd, u32 uimm5, u32 vb) { SetVr(vd, res_v4f32); } -void PPULLVMRecompiler::VCMPBFP(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPBFP(u32 vd, u32 va, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto cmp_gt_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); @@ -488,7 +470,7 @@ void PPULLVMRecompiler::VCMPBFP(u32 vd, u32 va, u32 vb) { // TODO: Implement NJ mode } -void PPULLVMRecompiler::VCMPBFP_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPBFP_(u32 vd, u32 va, u32 vb) { VCMPBFP(vd, va, vb); auto vd_v16i8 = GetVrAsIntVec(vd, 8); @@ -500,7 +482,7 @@ void PPULLVMRecompiler::VCMPBFP_(u32 vd, u32 va, u32 vb) { SetCrField(6, nullptr, nullptr, cmp_i1, nullptr); } -void PPULLVMRecompiler::VCMPEQFP(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPEQFP(u32 vd, u32 va, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto cmp_v4i1 = m_ir_builder->CreateFCmpOEQ(va_v4f32, vb_v4f32); @@ -508,12 +490,12 @@ void PPULLVMRecompiler::VCMPEQFP(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v4i32); } -void PPULLVMRecompiler::VCMPEQFP_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPEQFP_(u32 vd, u32 va, u32 vb) { VCMPEQFP(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPEQUB(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPEQUB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto cmp_v16i1 = m_ir_builder->CreateICmpEQ(va_v16i8, vb_v16i8); @@ -521,12 +503,12 @@ void PPULLVMRecompiler::VCMPEQUB(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v16i8); } -void PPULLVMRecompiler::VCMPEQUB_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPEQUB_(u32 vd, u32 va, u32 vb) { VCMPEQUB(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPEQUH(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPEQUH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto cmp_v8i1 = m_ir_builder->CreateICmpEQ(va_v8i16, vb_v8i16); @@ -534,12 +516,12 @@ void PPULLVMRecompiler::VCMPEQUH(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v8i16); } -void PPULLVMRecompiler::VCMPEQUH_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPEQUH_(u32 vd, u32 va, u32 vb) { VCMPEQUH(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPEQUW(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPEQUW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto cmp_v4i1 = m_ir_builder->CreateICmpEQ(va_v4i32, vb_v4i32); @@ -547,12 +529,12 @@ void PPULLVMRecompiler::VCMPEQUW(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v4i32); } -void PPULLVMRecompiler::VCMPEQUW_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPEQUW_(u32 vd, u32 va, u32 vb) { VCMPEQUW(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPGEFP(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGEFP(u32 vd, u32 va, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(va_v4f32, vb_v4f32); @@ -560,12 +542,12 @@ void PPULLVMRecompiler::VCMPGEFP(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v4i32); } -void PPULLVMRecompiler::VCMPGEFP_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGEFP_(u32 vd, u32 va, u32 vb) { VCMPGEFP(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPGTFP(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTFP(u32 vd, u32 va, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto cmp_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); @@ -573,12 +555,12 @@ void PPULLVMRecompiler::VCMPGTFP(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v4i32); } -void PPULLVMRecompiler::VCMPGTFP_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTFP_(u32 vd, u32 va, u32 vb) { VCMPGTFP(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPGTSB(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTSB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto cmp_v16i1 = m_ir_builder->CreateICmpSGT(va_v16i8, vb_v16i8); @@ -586,12 +568,12 @@ void PPULLVMRecompiler::VCMPGTSB(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v16i8); } -void PPULLVMRecompiler::VCMPGTSB_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTSB_(u32 vd, u32 va, u32 vb) { VCMPGTSB(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPGTSH(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTSH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto cmp_v8i1 = m_ir_builder->CreateICmpSGT(va_v8i16, vb_v8i16); @@ -599,12 +581,12 @@ void PPULLVMRecompiler::VCMPGTSH(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v8i16); } -void PPULLVMRecompiler::VCMPGTSH_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTSH_(u32 vd, u32 va, u32 vb) { VCMPGTSH(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPGTSW(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTSW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto cmp_v4i1 = m_ir_builder->CreateICmpSGT(va_v4i32, vb_v4i32); @@ -612,12 +594,12 @@ void PPULLVMRecompiler::VCMPGTSW(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v4i32); } -void PPULLVMRecompiler::VCMPGTSW_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTSW_(u32 vd, u32 va, u32 vb) { VCMPGTSW(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPGTUB(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTUB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto cmp_v16i1 = m_ir_builder->CreateICmpUGT(va_v16i8, vb_v16i8); @@ -625,12 +607,12 @@ void PPULLVMRecompiler::VCMPGTUB(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v16i8); } -void PPULLVMRecompiler::VCMPGTUB_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTUB_(u32 vd, u32 va, u32 vb) { VCMPGTUB(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPGTUH(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTUH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto cmp_v8i1 = m_ir_builder->CreateICmpUGT(va_v8i16, vb_v8i16); @@ -638,12 +620,12 @@ void PPULLVMRecompiler::VCMPGTUH(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v8i16); } -void PPULLVMRecompiler::VCMPGTUH_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTUH_(u32 vd, u32 va, u32 vb) { VCMPGTUH(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCMPGTUW(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTUW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto cmp_v4i1 = m_ir_builder->CreateICmpUGT(va_v4i32, vb_v4i32); @@ -651,28 +633,28 @@ void PPULLVMRecompiler::VCMPGTUW(u32 vd, u32 va, u32 vb) { SetVr(vd, cmp_v4i32); } -void PPULLVMRecompiler::VCMPGTUW_(u32 vd, u32 va, u32 vb) { +void Compiler::VCMPGTUW_(u32 vd, u32 va, u32 vb) { VCMPGTUW(vd, va, vb); SetCr6AfterVectorCompare(vd); } -void PPULLVMRecompiler::VCTSXS(u32 vd, u32 uimm5, u32 vb) { +void Compiler::VCTSXS(u32 vd, u32 uimm5, u32 vb) { InterpreterCall("VCTSXS", &PPUInterpreter::VCTSXS, vd, uimm5, vb); } -void PPULLVMRecompiler::VCTUXS(u32 vd, u32 uimm5, u32 vb) { +void Compiler::VCTUXS(u32 vd, u32 uimm5, u32 vb) { InterpreterCall("VCTUXS", &PPUInterpreter::VCTUXS, vd, uimm5, vb); } -void PPULLVMRecompiler::VEXPTEFP(u32 vd, u32 vb) { +void Compiler::VEXPTEFP(u32 vd, u32 vb) { InterpreterCall("VEXPTEFP", &PPUInterpreter::VEXPTEFP, vd, vb); } -void PPULLVMRecompiler::VLOGEFP(u32 vd, u32 vb) { +void Compiler::VLOGEFP(u32 vd, u32 vb) { InterpreterCall("VLOGEFP", &PPUInterpreter::VLOGEFP, vd, vb); } -void PPULLVMRecompiler::VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { +void Compiler::VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto vc_v4f32 = GetVrAsFloatVec(vc); @@ -680,117 +662,117 @@ void PPULLVMRecompiler::VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { SetVr(vd, res_v4f32); } -void PPULLVMRecompiler::VMAXFP(u32 vd, u32 va, u32 vb) { +void Compiler::VMAXFP(u32 vd, u32 va, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), va_v4f32, vb_v4f32); SetVr(vd, res_v4f32); } -void PPULLVMRecompiler::VMAXSB(u32 vd, u32 va, u32 vb) { +void Compiler::VMAXSB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsb), va_v16i8, vb_v16i8); SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VMAXSH(u32 vd, u32 va, u32 vb) { +void Compiler::VMAXSH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxs_w), va_v8i16, vb_v8i16); SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VMAXSW(u32 vd, u32 va, u32 vb) { +void Compiler::VMAXSW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsd), va_v4i32, vb_v4i32); SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VMAXUB(u32 vd, u32 va, u32 vb) { +void Compiler::VMAXUB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxu_b), va_v16i8, vb_v16i8); SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VMAXUH(u32 vd, u32 va, u32 vb) { +void Compiler::VMAXUH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxuw), va_v8i16, vb_v8i16); SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VMAXUW(u32 vd, u32 va, u32 vb) { +void Compiler::VMAXUW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxud), va_v4i32, vb_v4i32); SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { InterpreterCall("VMHADDSHS", &PPUInterpreter::VMHADDSHS, vd, va, vb, vc); } -void PPULLVMRecompiler::VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { InterpreterCall("VMHRADDSHS", &PPUInterpreter::VMHRADDSHS, vd, va, vb, vc); } -void PPULLVMRecompiler::VMINFP(u32 vd, u32 va, u32 vb) { +void Compiler::VMINFP(u32 vd, u32 va, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_min_ps), va_v4f32, vb_v4f32); SetVr(vd, res_v4f32); } -void PPULLVMRecompiler::VMINSB(u32 vd, u32 va, u32 vb) { +void Compiler::VMINSB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsb), va_v16i8, vb_v16i8); SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VMINSH(u32 vd, u32 va, u32 vb) { +void Compiler::VMINSH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmins_w), va_v8i16, vb_v8i16); SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VMINSW(u32 vd, u32 va, u32 vb) { +void Compiler::VMINSW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsd), va_v4i32, vb_v4i32); SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VMINUB(u32 vd, u32 va, u32 vb) { +void Compiler::VMINUB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pminu_b), va_v16i8, vb_v16i8); SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VMINUH(u32 vd, u32 va, u32 vb) { +void Compiler::VMINUH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, vb_v8i16); SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VMINUW(u32 vd, u32 va, u32 vb) { +void Compiler::VMINUW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, vb_v4i32); SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) { InterpreterCall("VMLADDUHM", &PPUInterpreter::VMLADDUHM, vd, va, vb, vc); } -void PPULLVMRecompiler::VMRGHB(u32 vd, u32 va, u32 vb) { +void Compiler::VMRGHB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); u32 mask_v16i32[16] = {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}; @@ -798,7 +780,7 @@ void PPULLVMRecompiler::VMRGHB(u32 vd, u32 va, u32 vb) { SetVr(vd, vd_v16i8); } -void PPULLVMRecompiler::VMRGHH(u32 vd, u32 va, u32 vb) { +void Compiler::VMRGHH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); u32 mask_v8i32[8] = {12, 4, 13, 5, 14, 6, 15, 7}; @@ -806,7 +788,7 @@ void PPULLVMRecompiler::VMRGHH(u32 vd, u32 va, u32 vb) { SetVr(vd, vd_v8i16); } -void PPULLVMRecompiler::VMRGHW(u32 vd, u32 va, u32 vb) { +void Compiler::VMRGHW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); u32 mask_v4i32[4] = {6, 2, 7, 3}; @@ -814,7 +796,7 @@ void PPULLVMRecompiler::VMRGHW(u32 vd, u32 va, u32 vb) { SetVr(vd, vd_v4i32); } -void PPULLVMRecompiler::VMRGLB(u32 vd, u32 va, u32 vb) { +void Compiler::VMRGLB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); u32 mask_v16i32[16] = {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}; @@ -822,7 +804,7 @@ void PPULLVMRecompiler::VMRGLB(u32 vd, u32 va, u32 vb) { SetVr(vd, vd_v16i8); } -void PPULLVMRecompiler::VMRGLH(u32 vd, u32 va, u32 vb) { +void Compiler::VMRGLH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); u32 mask_v8i32[8] = {8, 0, 9, 1, 10, 2, 11, 3}; @@ -830,7 +812,7 @@ void PPULLVMRecompiler::VMRGLH(u32 vd, u32 va, u32 vb) { SetVr(vd, vd_v8i16); } -void PPULLVMRecompiler::VMRGLW(u32 vd, u32 va, u32 vb) { +void Compiler::VMRGLW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); u32 mask_v4i32[4] = {4, 0, 5, 1}; @@ -838,7 +820,7 @@ void PPULLVMRecompiler::VMRGLW(u32 vd, u32 va, u32 vb) { SetVr(vd, vd_v4i32); } -void PPULLVMRecompiler::VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); @@ -870,7 +852,7 @@ void PPULLVMRecompiler::VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) { // TODO: Try to optimize with horizontal add } -void PPULLVMRecompiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto vc_v4i32 = GetVrAsIntVec(vc, 32); @@ -879,11 +861,11 @@ void PPULLVMRecompiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { InterpreterCall("VMSUMSHS", &PPUInterpreter::VMSUMSHS, vd, va, vb, vc); } -void PPULLVMRecompiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto va_v16i16 = m_ir_builder->CreateZExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); @@ -915,7 +897,7 @@ void PPULLVMRecompiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { // TODO: Try to optimize with horizontal add } -void PPULLVMRecompiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); @@ -937,43 +919,43 @@ void PPULLVMRecompiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { // TODO: Try to optimize with horizontal add } -void PPULLVMRecompiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { InterpreterCall("VMSUMUHS", &PPUInterpreter::VMSUMUHS, vd, va, vb, vc); } -void PPULLVMRecompiler::VMULESB(u32 vd, u32 va, u32 vb) { +void Compiler::VMULESB(u32 vd, u32 va, u32 vb) { InterpreterCall("VMULESB", &PPUInterpreter::VMULESB, vd, va, vb); } -void PPULLVMRecompiler::VMULESH(u32 vd, u32 va, u32 vb) { +void Compiler::VMULESH(u32 vd, u32 va, u32 vb) { InterpreterCall("VMULESH", &PPUInterpreter::VMULESH, vd, va, vb); } -void PPULLVMRecompiler::VMULEUB(u32 vd, u32 va, u32 vb) { +void Compiler::VMULEUB(u32 vd, u32 va, u32 vb) { InterpreterCall("VMULEUB", &PPUInterpreter::VMULEUB, vd, va, vb); } -void PPULLVMRecompiler::VMULEUH(u32 vd, u32 va, u32 vb) { +void Compiler::VMULEUH(u32 vd, u32 va, u32 vb) { InterpreterCall("VMULEUH", &PPUInterpreter::VMULEUH, vd, va, vb); } -void PPULLVMRecompiler::VMULOSB(u32 vd, u32 va, u32 vb) { +void Compiler::VMULOSB(u32 vd, u32 va, u32 vb) { InterpreterCall("VMULOSB", &PPUInterpreter::VMULOSB, vd, va, vb); } -void PPULLVMRecompiler::VMULOSH(u32 vd, u32 va, u32 vb) { +void Compiler::VMULOSH(u32 vd, u32 va, u32 vb) { InterpreterCall("VMULOSH", &PPUInterpreter::VMULOSH, vd, va, vb); } -void PPULLVMRecompiler::VMULOUB(u32 vd, u32 va, u32 vb) { +void Compiler::VMULOUB(u32 vd, u32 va, u32 vb) { InterpreterCall("VMULOUB", &PPUInterpreter::VMULOUB, vd, va, vb); } -void PPULLVMRecompiler::VMULOUH(u32 vd, u32 va, u32 vb) { +void Compiler::VMULOUH(u32 vd, u32 va, u32 vb) { InterpreterCall("VMULOUH", &PPUInterpreter::VMULOUH, vd, va, vb); } -void PPULLVMRecompiler::VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { +void Compiler::VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto vc_v4f32 = GetVrAsFloatVec(vc); @@ -982,7 +964,7 @@ void PPULLVMRecompiler::VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { SetVr(vd, res_v4f32); } -void PPULLVMRecompiler::VNOR(u32 vd, u32 va, u32 vb) { +void Compiler::VNOR(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); @@ -990,14 +972,14 @@ void PPULLVMRecompiler::VNOR(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VOR(u32 vd, u32 va, u32 vb) { +void Compiler::VOR(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto vc_v16i8 = GetVrAsIntVec(vc, 8); @@ -1019,81 +1001,81 @@ void PPULLVMRecompiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) { SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VPKPX(u32 vd, u32 va, u32 vb) { +void Compiler::VPKPX(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKPX", &PPUInterpreter::VPKPX, vd, va, vb); } -void PPULLVMRecompiler::VPKSHSS(u32 vd, u32 va, u32 vb) { +void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKSHSS", &PPUInterpreter::VPKSHSS, vd, va, vb); } -void PPULLVMRecompiler::VPKSHUS(u32 vd, u32 va, u32 vb) { +void Compiler::VPKSHUS(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKSHUS", &PPUInterpreter::VPKSHUS, vd, va, vb); } -void PPULLVMRecompiler::VPKSWSS(u32 vd, u32 va, u32 vb) { +void Compiler::VPKSWSS(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKSWSS", &PPUInterpreter::VPKSWSS, vd, va, vb); } -void PPULLVMRecompiler::VPKSWUS(u32 vd, u32 va, u32 vb) { +void Compiler::VPKSWUS(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKSWUS", &PPUInterpreter::VPKSWUS, vd, va, vb); } -void PPULLVMRecompiler::VPKUHUM(u32 vd, u32 va, u32 vb) { +void Compiler::VPKUHUM(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKUHUM", &PPUInterpreter::VPKUHUM, vd, va, vb); } -void PPULLVMRecompiler::VPKUHUS(u32 vd, u32 va, u32 vb) { +void Compiler::VPKUHUS(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKUHUS", &PPUInterpreter::VPKUHUS, vd, va, vb); } -void PPULLVMRecompiler::VPKUWUM(u32 vd, u32 va, u32 vb) { +void Compiler::VPKUWUM(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKUWUM", &PPUInterpreter::VPKUWUM, vd, va, vb); } -void PPULLVMRecompiler::VPKUWUS(u32 vd, u32 va, u32 vb) { +void Compiler::VPKUWUS(u32 vd, u32 va, u32 vb) { InterpreterCall("VPKUWUS", &PPUInterpreter::VPKUWUS, vd, va, vb); } -void PPULLVMRecompiler::VREFP(u32 vd, u32 vb) { +void Compiler::VREFP(u32 vd, u32 vb) { auto vb_v4f32 = GetVrAsFloatVec(vb); auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), vb_v4f32); SetVr(vd, res_v4f32); } -void PPULLVMRecompiler::VRFIM(u32 vd, u32 vb) { +void Compiler::VRFIM(u32 vd, u32 vb) { InterpreterCall("VRFIM", &PPUInterpreter::VRFIM, vd, vb); } -void PPULLVMRecompiler::VRFIN(u32 vd, u32 vb) { +void Compiler::VRFIN(u32 vd, u32 vb) { InterpreterCall("VRFIN", &PPUInterpreter::VRFIN, vd, vb); } -void PPULLVMRecompiler::VRFIP(u32 vd, u32 vb) { +void Compiler::VRFIP(u32 vd, u32 vb) { InterpreterCall("VRFIP", &PPUInterpreter::VRFIP, vd, vb); } -void PPULLVMRecompiler::VRFIZ(u32 vd, u32 vb) { +void Compiler::VRFIZ(u32 vd, u32 vb) { InterpreterCall("VRFIZ", &PPUInterpreter::VRFIZ, vd, vb); } -void PPULLVMRecompiler::VRLB(u32 vd, u32 va, u32 vb) { +void Compiler::VRLB(u32 vd, u32 va, u32 vb) { InterpreterCall("VRLB", &PPUInterpreter::VRLB, vd, va, vb); } -void PPULLVMRecompiler::VRLH(u32 vd, u32 va, u32 vb) { +void Compiler::VRLH(u32 vd, u32 va, u32 vb) { InterpreterCall("VRLH", &PPUInterpreter::VRLH, vd, va, vb); } -void PPULLVMRecompiler::VRLW(u32 vd, u32 va, u32 vb) { +void Compiler::VRLW(u32 vd, u32 va, u32 vb) { InterpreterCall("VRLW", &PPUInterpreter::VRLW, vd, va, vb); } -void PPULLVMRecompiler::VRSQRTEFP(u32 vd, u32 vb) { +void Compiler::VRSQRTEFP(u32 vd, u32 vb) { InterpreterCall("VRSQRTEFP", &PPUInterpreter::VRSQRTEFP, vd, vb); } -void PPULLVMRecompiler::VSEL(u32 vd, u32 va, u32 vb, u32 vc) { +void Compiler::VSEL(u32 vd, u32 va, u32 vb, u32 vc) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto vc_v4i32 = GetVrAsIntVec(vc, 32); @@ -1104,7 +1086,7 @@ void PPULLVMRecompiler::VSEL(u32 vd, u32 va, u32 vb, u32 vc) { SetVr(vd, vd_v4i32); } -void PPULLVMRecompiler::VSL(u32 vd, u32 va, u32 vb) { +void Compiler::VSL(u32 vd, u32 va, u32 vb) { auto va_i128 = GetVr(va); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); @@ -1114,7 +1096,7 @@ void PPULLVMRecompiler::VSL(u32 vd, u32 va, u32 vb) { SetVr(vd, va_i128); } -void PPULLVMRecompiler::VSLB(u32 vd, u32 va, u32 vb) { +void Compiler::VSLB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); @@ -1122,7 +1104,7 @@ void PPULLVMRecompiler::VSLB(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) { +void Compiler::VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); sh = 16 - sh; @@ -1131,7 +1113,7 @@ void PPULLVMRecompiler::VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) { SetVr(vd, vd_v16i8); } -void PPULLVMRecompiler::VSLH(u32 vd, u32 va, u32 vb) { +void Compiler::VSLH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); @@ -1139,7 +1121,7 @@ void PPULLVMRecompiler::VSLH(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VSLO(u32 vd, u32 va, u32 vb) { +void Compiler::VSLO(u32 vd, u32 va, u32 vb) { auto va_i128 = GetVr(va); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); @@ -1149,7 +1131,7 @@ void PPULLVMRecompiler::VSLO(u32 vd, u32 va, u32 vb) { SetVr(vd, va_i128); } -void PPULLVMRecompiler::VSLW(u32 vd, u32 va, u32 vb) { +void Compiler::VSLW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); @@ -1157,7 +1139,7 @@ void PPULLVMRecompiler::VSLW(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VSPLTB(u32 vd, u32 uimm5, u32 vb) { +void Compiler::VSPLTB(u32 vd, u32 uimm5, u32 vb) { auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto undef_v16i8 = UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)); auto mask_v16i32 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt32(15 - uimm5)); @@ -1165,7 +1147,7 @@ void PPULLVMRecompiler::VSPLTB(u32 vd, u32 uimm5, u32 vb) { SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VSPLTH(u32 vd, u32 uimm5, u32 vb) { +void Compiler::VSPLTH(u32 vd, u32 uimm5, u32 vb) { auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto undef_v8i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)); auto mask_v8i32 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(7 - uimm5)); @@ -1173,22 +1155,22 @@ void PPULLVMRecompiler::VSPLTH(u32 vd, u32 uimm5, u32 vb) { SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VSPLTISB(u32 vd, s32 simm5) { +void Compiler::VSPLTISB(u32 vd, s32 simm5) { auto vd_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8((s8)simm5)); SetVr(vd, vd_v16i8); } -void PPULLVMRecompiler::VSPLTISH(u32 vd, s32 simm5) { +void Compiler::VSPLTISH(u32 vd, s32 simm5) { auto vd_v8i16 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16((s16)simm5)); SetVr(vd, vd_v8i16); } -void PPULLVMRecompiler::VSPLTISW(u32 vd, s32 simm5) { +void Compiler::VSPLTISW(u32 vd, s32 simm5) { auto vd_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32((s32)simm5)); SetVr(vd, vd_v4i32); } -void PPULLVMRecompiler::VSPLTW(u32 vd, u32 uimm5, u32 vb) { +void Compiler::VSPLTW(u32 vd, u32 uimm5, u32 vb) { auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto undef_v4i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 4)); auto mask_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3 - uimm5)); @@ -1196,7 +1178,7 @@ void PPULLVMRecompiler::VSPLTW(u32 vd, u32 uimm5, u32 vb) { SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VSR(u32 vd, u32 va, u32 vb) { +void Compiler::VSR(u32 vd, u32 va, u32 vb) { auto va_i128 = GetVr(va); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); @@ -1206,7 +1188,7 @@ void PPULLVMRecompiler::VSR(u32 vd, u32 va, u32 vb) { SetVr(vd, va_i128); } -void PPULLVMRecompiler::VSRAB(u32 vd, u32 va, u32 vb) { +void Compiler::VSRAB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); @@ -1214,7 +1196,7 @@ void PPULLVMRecompiler::VSRAB(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VSRAH(u32 vd, u32 va, u32 vb) { +void Compiler::VSRAH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); @@ -1222,7 +1204,7 @@ void PPULLVMRecompiler::VSRAH(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VSRAW(u32 vd, u32 va, u32 vb) { +void Compiler::VSRAW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); @@ -1230,7 +1212,7 @@ void PPULLVMRecompiler::VSRAW(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VSRB(u32 vd, u32 va, u32 vb) { +void Compiler::VSRB(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); @@ -1238,7 +1220,7 @@ void PPULLVMRecompiler::VSRB(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v16i8); } -void PPULLVMRecompiler::VSRH(u32 vd, u32 va, u32 vb) { +void Compiler::VSRH(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); @@ -1246,7 +1228,7 @@ void PPULLVMRecompiler::VSRH(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::VSRO(u32 vd, u32 va, u32 vb) { +void Compiler::VSRO(u32 vd, u32 va, u32 vb) { auto va_i128 = GetVr(va); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); @@ -1256,7 +1238,7 @@ void PPULLVMRecompiler::VSRO(u32 vd, u32 va, u32 vb) { SetVr(vd, va_i128); } -void PPULLVMRecompiler::VSRW(u32 vd, u32 va, u32 vb) { +void Compiler::VSRW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); @@ -1264,7 +1246,7 @@ void PPULLVMRecompiler::VSRW(u32 vd, u32 va, u32 vb) { SetVr(vd, res_v4i32); } -void PPULLVMRecompiler::VSUBCUW(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBCUW(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); @@ -1273,14 +1255,14 @@ void PPULLVMRecompiler::VSUBCUW(u32 vd, u32 va, u32 vb) { SetVr(vd, cmpv4i32); } -void PPULLVMRecompiler::VSUBFP(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBFP(u32 vd, u32 va, u32 vb) { auto va_v4f32 = GetVrAsFloatVec(va); auto vb_v4f32 = GetVrAsFloatVec(vb); auto diff_v4f32 = m_ir_builder->CreateFSub(va_v4f32, vb_v4f32); SetVr(vd, diff_v4f32); } -void PPULLVMRecompiler::VSUBSBS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBSBS(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_b), va_v16i8, vb_v16i8); @@ -1289,7 +1271,7 @@ void PPULLVMRecompiler::VSUBSBS(u32 vd, u32 va, u32 vb) { // TODO: Set VSCR.SAT } -void PPULLVMRecompiler::VSUBSHS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBSHS(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_w), va_v8i16, vb_v8i16); @@ -1298,7 +1280,7 @@ void PPULLVMRecompiler::VSUBSHS(u32 vd, u32 va, u32 vb) { // TODO: Set VSCR.SAT } -void PPULLVMRecompiler::VSUBSWS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBSWS(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); @@ -1329,14 +1311,14 @@ void PPULLVMRecompiler::VSUBSWS(u32 vd, u32 va, u32 vb) { // TODO: Set SAT } -void PPULLVMRecompiler::VSUBUBM(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBUBM(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto diff_v16i8 = m_ir_builder->CreateSub(va_v16i8, vb_v16i8); SetVr(vd, diff_v16i8); } -void PPULLVMRecompiler::VSUBUBS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBUBS(u32 vd, u32 va, u32 vb) { auto va_v16i8 = GetVrAsIntVec(va, 8); auto vb_v16i8 = GetVrAsIntVec(vb, 8); auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_b), va_v16i8, vb_v16i8); @@ -1345,14 +1327,14 @@ void PPULLVMRecompiler::VSUBUBS(u32 vd, u32 va, u32 vb) { // TODO: Set SAT } -void PPULLVMRecompiler::VSUBUHM(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBUHM(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto diff_v8i16 = m_ir_builder->CreateSub(va_v8i16, vb_v8i16); SetVr(vd, diff_v8i16); } -void PPULLVMRecompiler::VSUBUHS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBUHS(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_w), va_v8i16, vb_v8i16); @@ -1361,14 +1343,14 @@ void PPULLVMRecompiler::VSUBUHS(u32 vd, u32 va, u32 vb) { // TODO: Set SAT } -void PPULLVMRecompiler::VSUBUWM(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBUWM(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); SetVr(vd, diff_v4i32); } -void PPULLVMRecompiler::VSUBUWS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUBUWS(u32 vd, u32 va, u32 vb) { auto va_v4i32 = GetVrAsIntVec(va, 32); auto vb_v4i32 = GetVrAsIntVec(vb, 32); auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); @@ -1380,65 +1362,65 @@ void PPULLVMRecompiler::VSUBUWS(u32 vd, u32 va, u32 vb) { // TODO: Set SAT } -void PPULLVMRecompiler::VSUMSWS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUMSWS(u32 vd, u32 va, u32 vb) { InterpreterCall("VSUMSWS", &PPUInterpreter::VSUMSWS, vd, va, vb); } -void PPULLVMRecompiler::VSUM2SWS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUM2SWS(u32 vd, u32 va, u32 vb) { InterpreterCall("VSUM2SWS", &PPUInterpreter::VSUM2SWS, vd, va, vb); } -void PPULLVMRecompiler::VSUM4SBS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUM4SBS(u32 vd, u32 va, u32 vb) { InterpreterCall("VSUM4SBS", &PPUInterpreter::VSUM4SBS, vd, va, vb); } -void PPULLVMRecompiler::VSUM4SHS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUM4SHS(u32 vd, u32 va, u32 vb) { InterpreterCall("VSUM4SHS", &PPUInterpreter::VSUM4SHS, vd, va, vb); } -void PPULLVMRecompiler::VSUM4UBS(u32 vd, u32 va, u32 vb) { +void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) { InterpreterCall("VSUM4UBS", &PPUInterpreter::VSUM4UBS, vd, va, vb); } -void PPULLVMRecompiler::VUPKHPX(u32 vd, u32 vb) { +void Compiler::VUPKHPX(u32 vd, u32 vb) { InterpreterCall("VUPKHPX", &PPUInterpreter::VUPKHPX, vd, vb); } -void PPULLVMRecompiler::VUPKHSB(u32 vd, u32 vb) { +void Compiler::VUPKHSB(u32 vd, u32 vb) { InterpreterCall("VUPKHSB", &PPUInterpreter::VUPKHSB, vd, vb); } -void PPULLVMRecompiler::VUPKHSH(u32 vd, u32 vb) { +void Compiler::VUPKHSH(u32 vd, u32 vb) { InterpreterCall("VUPKHSH", &PPUInterpreter::VUPKHSH, vd, vb); } -void PPULLVMRecompiler::VUPKLPX(u32 vd, u32 vb) { +void Compiler::VUPKLPX(u32 vd, u32 vb) { InterpreterCall("VUPKLPX", &PPUInterpreter::VUPKLPX, vd, vb); } -void PPULLVMRecompiler::VUPKLSB(u32 vd, u32 vb) { +void Compiler::VUPKLSB(u32 vd, u32 vb) { InterpreterCall("VUPKLSB", &PPUInterpreter::VUPKLSB, vd, vb); } -void PPULLVMRecompiler::VUPKLSH(u32 vd, u32 vb) { +void Compiler::VUPKLSH(u32 vd, u32 vb) { InterpreterCall("VUPKLSH", &PPUInterpreter::VUPKLSH, vd, vb); } -void PPULLVMRecompiler::VXOR(u32 vd, u32 va, u32 vb) { +void Compiler::VXOR(u32 vd, u32 va, u32 vb) { auto va_v8i16 = GetVrAsIntVec(va, 16); auto vb_v8i16 = GetVrAsIntVec(vb, 16); auto res_v8i16 = m_ir_builder->CreateXor(va_v8i16, vb_v8i16); SetVr(vd, res_v8i16); } -void PPULLVMRecompiler::MULLI(u32 rd, u32 ra, s32 simm16) { +void Compiler::MULLI(u32 rd, u32 ra, s32 simm16) { auto ra_i64 = GetGpr(ra); auto res_i64 = m_ir_builder->CreateMul(ra_i64, m_ir_builder->getInt64((s64)simm16)); SetGpr(rd, res_i64); //InterpreterCall("MULLI", &PPUInterpreter::MULLI, rd, ra, simm16); } -void PPULLVMRecompiler::SUBFIC(u32 rd, u32 ra, s32 simm16) { +void Compiler::SUBFIC(u32 rd, u32 ra, s32 simm16) { auto ra_i64 = GetGpr(ra); ra_i64 = m_ir_builder->CreateNeg(ra_i64); auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, m_ir_builder->getInt64((s64)simm16)); @@ -1449,7 +1431,7 @@ void PPULLVMRecompiler::SUBFIC(u32 rd, u32 ra, s32 simm16) { //InterpreterCall("SUBFIC", &PPUInterpreter::SUBFIC, rd, ra, simm16); } -void PPULLVMRecompiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) { +void Compiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) { Value * ra_i64; if (l == 0) { ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); @@ -1461,7 +1443,7 @@ void PPULLVMRecompiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) { //InterpreterCall("CMPLI", &PPUInterpreter::CMPLI, crfd, l, ra, uimm16); } -void PPULLVMRecompiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) { +void Compiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) { Value * ra_i64; if (l == 0) { ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); @@ -1473,7 +1455,7 @@ void PPULLVMRecompiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) { //InterpreterCall("CMPI", &PPUInterpreter::CMPI, crfd, l, ra, simm16); } -void PPULLVMRecompiler::ADDIC(u32 rd, u32 ra, s32 simm16) { +void Compiler::ADDIC(u32 rd, u32 ra, s32 simm16) { auto ra_i64 = GetGpr(ra); auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), m_ir_builder->getInt64((s64)simm16), ra_i64); auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); @@ -1483,13 +1465,13 @@ void PPULLVMRecompiler::ADDIC(u32 rd, u32 ra, s32 simm16) { //InterpreterCall("ADDIC", &PPUInterpreter::ADDIC, rd, ra, simm16); } -void PPULLVMRecompiler::ADDIC_(u32 rd, u32 ra, s32 simm16) { +void Compiler::ADDIC_(u32 rd, u32 ra, s32 simm16) { ADDIC(rd, ra, simm16); SetCrFieldSignedCmp(0, GetGpr(rd), m_ir_builder->getInt64(0)); //InterpreterCall("ADDIC_", &PPUInterpreter::ADDIC_, rd, ra, simm16); } -void PPULLVMRecompiler::ADDI(u32 rd, u32 ra, s32 simm16) { +void Compiler::ADDI(u32 rd, u32 ra, s32 simm16) { if (ra == 0) { SetGpr(rd, m_ir_builder->getInt64((s64)simm16)); } else { @@ -1500,7 +1482,7 @@ void PPULLVMRecompiler::ADDI(u32 rd, u32 ra, s32 simm16) { //InterpreterCall("ADDI", &PPUInterpreter::ADDI, rd, ra, simm16); } -void PPULLVMRecompiler::ADDIS(u32 rd, u32 ra, s32 simm16) { +void Compiler::ADDIS(u32 rd, u32 ra, s32 simm16) { if (ra == 0) { SetGpr(rd, m_ir_builder->getInt64((s64)simm16 << 16)); } else { @@ -1511,7 +1493,7 @@ void PPULLVMRecompiler::ADDIS(u32 rd, u32 ra, s32 simm16) { //InterpreterCall("ADDIS", &PPUInterpreter::ADDIS, rd, ra, simm16); } -void PPULLVMRecompiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { +void Compiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_current_instruction_address, bd)); CreateBranch(CheckBranchCondition(bo, bi), target_i64, lk ? true : false); //m_hit_branch_instruction = true; @@ -1521,11 +1503,11 @@ void PPULLVMRecompiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { //m_ir_builder->CreateRetVoid(); } -void PPULLVMRecompiler::SC(u32 sc_code) { +void Compiler::SC(u32 sc_code) { InterpreterCall("SC", &PPUInterpreter::SC, sc_code); } -void PPULLVMRecompiler::B(s32 ll, u32 aa, u32 lk) { +void Compiler::B(s32 ll, u32 aa, u32 lk) { auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_current_instruction_address, ll)); CreateBranch(nullptr, target_i64, lk ? true : false); //m_hit_branch_instruction = true; @@ -1534,7 +1516,7 @@ void PPULLVMRecompiler::B(s32 ll, u32 aa, u32 lk) { //m_ir_builder->CreateRetVoid(); } -void PPULLVMRecompiler::MCRF(u32 crfd, u32 crfs) { +void Compiler::MCRF(u32 crfd, u32 crfs) { if (crfd != crfs) { auto cr_i32 = GetCr(); auto crf_i32 = GetNibble(cr_i32, crfs); @@ -1544,10 +1526,10 @@ void PPULLVMRecompiler::MCRF(u32 crfd, u32 crfs) { //InterpreterCall("MCRF", &PPUInterpreter::MCRF, crfd, crfs); } -void PPULLVMRecompiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { +void Compiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { auto lr_i64 = GetLr(); lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL); - CreateBranch(CheckBranchCondition(bo, bi), lr_i64, lk ? true : false); + CreateBranch(CheckBranchCondition(bo, bi), lr_i64, lk ? true : false, true); //m_hit_branch_instruction = true; //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); //InterpreterCall("BCLR", &PPUInterpreter::BCLR, bo, bi, bh, lk); @@ -1555,7 +1537,7 @@ void PPULLVMRecompiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { //m_ir_builder->CreateRetVoid(); } -void PPULLVMRecompiler::CRNOR(u32 crbd, u32 crba, u32 crbb) { +void Compiler::CRNOR(u32 crbd, u32 crba, u32 crbb) { auto cr_i32 = GetCr(); auto ba_i32 = GetBit(cr_i32, crba); auto bb_i32 = GetBit(cr_i32, crbb); @@ -1566,7 +1548,7 @@ void PPULLVMRecompiler::CRNOR(u32 crbd, u32 crba, u32 crbb) { //InterpreterCall("CRNOR", &PPUInterpreter::CRNOR, crbd, crba, crbb); } -void PPULLVMRecompiler::CRANDC(u32 crbd, u32 crba, u32 crbb) { +void Compiler::CRANDC(u32 crbd, u32 crba, u32 crbb) { auto cr_i32 = GetCr(); auto ba_i32 = GetBit(cr_i32, crba); auto bb_i32 = GetBit(cr_i32, crbb); @@ -1577,12 +1559,12 @@ void PPULLVMRecompiler::CRANDC(u32 crbd, u32 crba, u32 crbb) { //InterpreterCall("CRANDC", &PPUInterpreter::CRANDC, crbd, crba, crbb); } -void PPULLVMRecompiler::ISYNC() { +void Compiler::ISYNC() { m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); //InterpreterCall("ISYNC", &PPUInterpreter::ISYNC); } -void PPULLVMRecompiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { +void Compiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { auto cr_i32 = GetCr(); auto ba_i32 = GetBit(cr_i32, crba); auto bb_i32 = GetBit(cr_i32, crbb); @@ -1592,7 +1574,7 @@ void PPULLVMRecompiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { //InterpreterCall("CRXOR", &PPUInterpreter::CRXOR, crbd, crba, crbb); } -void PPULLVMRecompiler::CRNAND(u32 crbd, u32 crba, u32 crbb) { +void Compiler::CRNAND(u32 crbd, u32 crba, u32 crbb) { auto cr_i32 = GetCr(); auto ba_i32 = GetBit(cr_i32, crba); auto bb_i32 = GetBit(cr_i32, crbb); @@ -1603,7 +1585,7 @@ void PPULLVMRecompiler::CRNAND(u32 crbd, u32 crba, u32 crbb) { //InterpreterCall("CRNAND", &PPUInterpreter::CRNAND, crbd, crba, crbb); } -void PPULLVMRecompiler::CRAND(u32 crbd, u32 crba, u32 crbb) { +void Compiler::CRAND(u32 crbd, u32 crba, u32 crbb) { auto cr_i32 = GetCr(); auto ba_i32 = GetBit(cr_i32, crba); auto bb_i32 = GetBit(cr_i32, crbb); @@ -1613,7 +1595,7 @@ void PPULLVMRecompiler::CRAND(u32 crbd, u32 crba, u32 crbb) { //InterpreterCall("CRAND", &PPUInterpreter::CRAND, crbd, crba, crbb); } -void PPULLVMRecompiler::CREQV(u32 crbd, u32 crba, u32 crbb) { +void Compiler::CREQV(u32 crbd, u32 crba, u32 crbb) { auto cr_i32 = GetCr(); auto ba_i32 = GetBit(cr_i32, crba); auto bb_i32 = GetBit(cr_i32, crbb); @@ -1624,7 +1606,7 @@ void PPULLVMRecompiler::CREQV(u32 crbd, u32 crba, u32 crbb) { //InterpreterCall("CREQV", &PPUInterpreter::CREQV, crbd, crba, crbb); } -void PPULLVMRecompiler::CRORC(u32 crbd, u32 crba, u32 crbb) { +void Compiler::CRORC(u32 crbd, u32 crba, u32 crbb) { auto cr_i32 = GetCr(); auto ba_i32 = GetBit(cr_i32, crba); auto bb_i32 = GetBit(cr_i32, crbb); @@ -1635,7 +1617,7 @@ void PPULLVMRecompiler::CRORC(u32 crbd, u32 crba, u32 crbb) { //InterpreterCall("CRORC", &PPUInterpreter::CRORC, crbd, crba, crbb); } -void PPULLVMRecompiler::CROR(u32 crbd, u32 crba, u32 crbb) { +void Compiler::CROR(u32 crbd, u32 crba, u32 crbb) { auto cr_i32 = GetCr(); auto ba_i32 = GetBit(cr_i32, crba); auto bb_i32 = GetBit(cr_i32, crbb); @@ -1645,7 +1627,7 @@ void PPULLVMRecompiler::CROR(u32 crbd, u32 crba, u32 crbb) { //InterpreterCall("CROR", &PPUInterpreter::CROR, crbd, crba, crbb); } -void PPULLVMRecompiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { +void Compiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { auto ctr_i64 = GetCtr(); ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL); CreateBranch(CheckBranchCondition(bo, bi), ctr_i64, lk ? true : false); @@ -1656,7 +1638,7 @@ void PPULLVMRecompiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { //m_ir_builder->CreateRetVoid(); } -void PPULLVMRecompiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { +void Compiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); @@ -1681,7 +1663,7 @@ void PPULLVMRecompiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) //InterpreterCall("RLWIMI", &PPUInterpreter::RLWIMI, ra, rs, sh, mb, me, rc); } -void PPULLVMRecompiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { +void Compiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); @@ -1702,7 +1684,7 @@ void PPULLVMRecompiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) //InterpreterCall("RLWINM", &PPUInterpreter::RLWINM, ra, rs, sh, mb, me, rc); } -void PPULLVMRecompiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, bool rc) { +void Compiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); @@ -1722,35 +1704,35 @@ void PPULLVMRecompiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, bool rc) { //InterpreterCall("RLWNM", &PPUInterpreter::RLWNM, ra, rs, rb, mb, me, rc); } -void PPULLVMRecompiler::ORI(u32 ra, u32 rs, u32 uimm16) { +void Compiler::ORI(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateOr(rs_i64, uimm16); SetGpr(ra, res_i64); //InterpreterCall("ORI", &PPUInterpreter::ORI, ra, rs, uimm16); } -void PPULLVMRecompiler::ORIS(u32 ra, u32 rs, u32 uimm16) { +void Compiler::ORIS(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateOr(rs_i64, (u64)uimm16 << 16); SetGpr(ra, res_i64); //InterpreterCall("ORIS", &PPUInterpreter::ORIS, ra, rs, uimm16); } -void PPULLVMRecompiler::XORI(u32 ra, u32 rs, u32 uimm16) { +void Compiler::XORI(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateXor(rs_i64, uimm16); SetGpr(ra, res_i64); //InterpreterCall("XORI", &PPUInterpreter::XORI, ra, rs, uimm16); } -void PPULLVMRecompiler::XORIS(u32 ra, u32 rs, u32 uimm16) { +void Compiler::XORIS(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateXor(rs_i64, (u64)uimm16 << 16); SetGpr(ra, res_i64); //InterpreterCall("XORIS", &PPUInterpreter::XORIS, ra, rs, uimm16); } -void PPULLVMRecompiler::ANDI_(u32 ra, u32 rs, u32 uimm16) { +void Compiler::ANDI_(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateAnd(rs_i64, uimm16); SetGpr(ra, res_i64); @@ -1758,7 +1740,7 @@ void PPULLVMRecompiler::ANDI_(u32 ra, u32 rs, u32 uimm16) { //InterpreterCall("ANDI_", &PPUInterpreter::ANDI_, ra, rs, uimm16); } -void PPULLVMRecompiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) { +void Compiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateAnd(rs_i64, (u64)uimm16 << 16); SetGpr(ra, res_i64); @@ -1766,7 +1748,7 @@ void PPULLVMRecompiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) { //InterpreterCall("ANDIS_", &PPUInterpreter::ANDIS_, ra, rs, uimm16); } -void PPULLVMRecompiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { +void Compiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { auto rs_i64 = GetGpr(rs); auto res_i64 = rs_i64; if (sh) { @@ -1784,7 +1766,7 @@ void PPULLVMRecompiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { //InterpreterCall("RLDICL", &PPUInterpreter::RLDICL, ra, rs, sh, mb, rc); } -void PPULLVMRecompiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { +void Compiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { auto rs_i64 = GetGpr(rs); auto res_i64 = rs_i64; if (sh) { @@ -1802,7 +1784,7 @@ void PPULLVMRecompiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { //InterpreterCall("RLDICR", &PPUInterpreter::RLDICR, ra, rs, sh, me, rc); } -void PPULLVMRecompiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { +void Compiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { auto rs_i64 = GetGpr(rs); auto res_i64 = rs_i64; if (sh) { @@ -1820,7 +1802,7 @@ void PPULLVMRecompiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { //InterpreterCall("RLDIC", &PPUInterpreter::RLDIC, ra, rs, sh, mb, rc); } -void PPULLVMRecompiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { +void Compiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { auto rs_i64 = GetGpr(rs); auto ra_i64 = GetGpr(ra); auto res_i64 = rs_i64; @@ -1842,7 +1824,7 @@ void PPULLVMRecompiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { //InterpreterCall("RLDIMI", &PPUInterpreter::RLDIMI, ra, rs, sh, mb, rc); } -void PPULLVMRecompiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) { +void Compiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) { auto rs_i64 = GetGpr(rs); auto rb_i64 = GetGpr(rb); auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x3F); @@ -1865,7 +1847,7 @@ void PPULLVMRecompiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, boo //InterpreterCall("RLDC_LR", &PPUInterpreter::RLDC_LR, ra, rs, rb, m_eb, is_r, rc); } -void PPULLVMRecompiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { +void Compiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { Value * ra_i64; Value * rb_i64; if (l == 0) { @@ -1880,11 +1862,11 @@ void PPULLVMRecompiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { //InterpreterCall("CMP", &PPUInterpreter::CMP, crfd, l, ra, rb); } -void PPULLVMRecompiler::TW(u32 to, u32 ra, u32 rb) { +void Compiler::TW(u32 to, u32 ra, u32 rb) { InterpreterCall("TW", &PPUInterpreter::TW, to, ra, rb); } -void PPULLVMRecompiler::LVSL(u32 vd, u32 ra, u32 rb) { +void Compiler::LVSL(u32 vd, u32 ra, u32 rb) { static const u128 s_lvsl_values[] = { {0x08090A0B0C0D0E0F, 0x0001020304050607}, {0x090A0B0C0D0E0F10, 0x0102030405060708}, @@ -1919,7 +1901,7 @@ void PPULLVMRecompiler::LVSL(u32 vd, u32 ra, u32 rb) { //InterpreterCall("LVSL", &PPUInterpreter::LVSL, vd, ra, rb); } -void PPULLVMRecompiler::LVEBX(u32 vd, u32 ra, u32 rb) { +void Compiler::LVEBX(u32 vd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -1936,7 +1918,7 @@ void PPULLVMRecompiler::LVEBX(u32 vd, u32 ra, u32 rb) { //InterpreterCall("LVEBX", &PPUInterpreter::LVEBX, vd, ra, rb); } -void PPULLVMRecompiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); ra_i64 = m_ir_builder->CreateNeg(ra_i64); auto rb_i64 = GetGpr(rb); @@ -1956,7 +1938,7 @@ void PPULLVMRecompiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("SUBFC", &PPUInterpreter::SUBFC, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); auto rb_i64 = GetGpr(rb); auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); @@ -1975,7 +1957,7 @@ void PPULLVMRecompiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("ADDC", &PPUInterpreter::ADDC, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { +void Compiler::MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { auto ra_i64 = GetGpr(ra); auto rb_i64 = GetGpr(rb); auto ra_i128 = m_ir_builder->CreateZExt(ra_i64, m_ir_builder->getIntNTy(128)); @@ -1992,7 +1974,7 @@ void PPULLVMRecompiler::MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { //InterpreterCall("MULHDU", &PPUInterpreter::MULHDU, rd, ra, rb, rc); } -void PPULLVMRecompiler::MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { +void Compiler::MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { auto ra_i32 = GetGpr(ra, 32); auto rb_i32 = GetGpr(rb, 32); auto ra_i64 = m_ir_builder->CreateZExt(ra_i32, m_ir_builder->getInt64Ty()); @@ -2007,27 +1989,27 @@ void PPULLVMRecompiler::MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { //InterpreterCall("MULHWU", &PPUInterpreter::MULHWU, rd, ra, rb, rc); } -void PPULLVMRecompiler::MFOCRF(u32 a, u32 rd, u32 crm) { +void Compiler::MFOCRF(u32 a, u32 rd, u32 crm) { auto cr_i32 = GetCr(); auto cr_i64 = m_ir_builder->CreateZExt(cr_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, cr_i64); //InterpreterCall("MFOCRF", &PPUInterpreter::MFOCRF, a, rd, crm); } -void PPULLVMRecompiler::LWARX(u32 rd, u32 ra, u32 rb) { +void Compiler::LWARX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); } - auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, R_ADDR)); + auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, R_ADDR)); auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(addr_i64, resv_addr_i64_ptr, 8); auto resv_val_i32 = ReadMemory(addr_i64, 32, 4, false, false); auto resv_val_i64 = m_ir_builder->CreateZExt(resv_val_i32, m_ir_builder->getInt64Ty()); - auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, R_VALUE)); + auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, R_VALUE)); auto resv_val_i64_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(resv_val_i64, resv_val_i64_ptr, 8); @@ -2037,7 +2019,7 @@ void PPULLVMRecompiler::LWARX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LWARX", &PPUInterpreter::LWARX, rd, ra, rb); } -void PPULLVMRecompiler::LDX(u32 rd, u32 ra, u32 rb) { +void Compiler::LDX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2049,7 +2031,7 @@ void PPULLVMRecompiler::LDX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LDX", &PPUInterpreter::LDX, rd, ra, rb); } -void PPULLVMRecompiler::LWZX(u32 rd, u32 ra, u32 rb) { +void Compiler::LWZX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2062,7 +2044,7 @@ void PPULLVMRecompiler::LWZX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LWZX", &PPUInterpreter::LWZX, rd, ra, rb); } -void PPULLVMRecompiler::SLW(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::SLW(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); auto rb_i8 = GetGpr(rb, 8); @@ -2080,7 +2062,7 @@ void PPULLVMRecompiler::SLW(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("SLW", &PPUInterpreter::SLW, ra, rs, rb, rc); } -void PPULLVMRecompiler::CNTLZW(u32 ra, u32 rs, bool rc) { +void Compiler::CNTLZW(u32 ra, u32 rs, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto res_i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt32Ty()), rs_i32, m_ir_builder->getInt1(false)); auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); @@ -2093,7 +2075,7 @@ void PPULLVMRecompiler::CNTLZW(u32 ra, u32 rs, bool rc) { //InterpreterCall("CNTLZW", &PPUInterpreter::CNTLZW, ra, rs, rc); } -void PPULLVMRecompiler::SLD(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::SLD(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i64 = GetGpr(rs); auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); auto rb_i8 = GetGpr(rb, 8); @@ -2110,7 +2092,7 @@ void PPULLVMRecompiler::SLD(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("SLD", &PPUInterpreter::SLD, ra, rs, rb, rc); } -void PPULLVMRecompiler::AND(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::AND(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i64 = GetGpr(rs); auto rb_i64 = GetGpr(rb); auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); @@ -2122,7 +2104,7 @@ void PPULLVMRecompiler::AND(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("AND", &PPUInterpreter::AND, ra, rs, rb, rc); } -void PPULLVMRecompiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { +void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { Value * ra_i64; Value * rb_i64; if (l == 0) { @@ -2137,7 +2119,7 @@ void PPULLVMRecompiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { //InterpreterCall("CMPL", &PPUInterpreter::CMPL, crfd, l, ra, rb); } -void PPULLVMRecompiler::LVSR(u32 vd, u32 ra, u32 rb) { +void Compiler::LVSR(u32 vd, u32 ra, u32 rb) { static const u128 s_lvsr_values[] = { {0x18191A1B1C1D1E1F, 0x1011121314151617}, {0x1718191A1B1C1D1E, 0x0F10111213141516}, @@ -2172,7 +2154,7 @@ void PPULLVMRecompiler::LVSR(u32 vd, u32 ra, u32 rb) { //InterpreterCall("LVSR", &PPUInterpreter::LVSR, vd, ra, rb); } -void PPULLVMRecompiler::LVEHX(u32 vd, u32 ra, u32 rb) { +void Compiler::LVEHX(u32 vd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2191,7 +2173,7 @@ void PPULLVMRecompiler::LVEHX(u32 vd, u32 ra, u32 rb) { //InterpreterCall("LVEHX", &PPUInterpreter::LVEHX, vd, ra, rb); } -void PPULLVMRecompiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); auto rb_i64 = GetGpr(rb); auto diff_i64 = m_ir_builder->CreateSub(rb_i64, ra_i64); @@ -2207,7 +2189,7 @@ void PPULLVMRecompiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("SUBF", &PPUInterpreter::SUBF, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::LDUX(u32 rd, u32 ra, u32 rb) { +void Compiler::LDUX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2218,12 +2200,12 @@ void PPULLVMRecompiler::LDUX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LDUX", &PPUInterpreter::LDUX, rd, ra, rb); } -void PPULLVMRecompiler::DCBST(u32 ra, u32 rb) { +void Compiler::DCBST(u32 ra, u32 rb) { // TODO: Implement this //InterpreterCall("DCBST", &PPUInterpreter::DCBST, ra, rb); } -void PPULLVMRecompiler::LWZUX(u32 rd, u32 ra, u32 rb) { +void Compiler::LWZUX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2235,7 +2217,7 @@ void PPULLVMRecompiler::LWZUX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LWZUX", &PPUInterpreter::LWZUX, rd, ra, rb); } -void PPULLVMRecompiler::CNTLZD(u32 ra, u32 rs, bool rc) { +void Compiler::CNTLZD(u32 ra, u32 rs, bool rc) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt64Ty()), rs_i64, m_ir_builder->getInt1(false)); SetGpr(ra, res_i64); @@ -2247,15 +2229,15 @@ void PPULLVMRecompiler::CNTLZD(u32 ra, u32 rs, bool rc) { //InterpreterCall("CNTLZD", &PPUInterpreter::CNTLZD, ra, rs, rc); } -void PPULLVMRecompiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) { InterpreterCall("ANDC", &PPUInterpreter::ANDC, ra, rs, rb, rc); } -void PPULLVMRecompiler::TD(u32 to, u32 ra, u32 rb) { +void Compiler::TD(u32 to, u32 ra, u32 rb) { InterpreterCall("TD", &PPUInterpreter::TD, to, ra, rb); } -void PPULLVMRecompiler::LVEWX(u32 vd, u32 ra, u32 rb) { +void Compiler::LVEWX(u32 vd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2274,7 +2256,7 @@ void PPULLVMRecompiler::LVEWX(u32 vd, u32 ra, u32 rb) { //InterpreterCall("LVEWX", &PPUInterpreter::LVEWX, vd, ra, rb); } -void PPULLVMRecompiler::MULHD(u32 rd, u32 ra, u32 rb, bool rc) { +void Compiler::MULHD(u32 rd, u32 ra, u32 rb, bool rc) { auto ra_i64 = GetGpr(ra); auto rb_i64 = GetGpr(rb); auto ra_i128 = m_ir_builder->CreateSExt(ra_i64, m_ir_builder->getIntNTy(128)); @@ -2291,7 +2273,7 @@ void PPULLVMRecompiler::MULHD(u32 rd, u32 ra, u32 rb, bool rc) { //InterpreterCall("MULHD", &PPUInterpreter::MULHD, rd, ra, rb, rc); } -void PPULLVMRecompiler::MULHW(u32 rd, u32 ra, u32 rb, bool rc) { +void Compiler::MULHW(u32 rd, u32 ra, u32 rb, bool rc) { auto ra_i32 = GetGpr(ra, 32); auto rb_i32 = GetGpr(rb, 32); auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); @@ -2306,19 +2288,19 @@ void PPULLVMRecompiler::MULHW(u32 rd, u32 ra, u32 rb, bool rc) { //InterpreterCall("MULHW", &PPUInterpreter::MULHW, rd, ra, rb, rc); } -void PPULLVMRecompiler::LDARX(u32 rd, u32 ra, u32 rb) { +void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); } - auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, R_ADDR)); + auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, R_ADDR)); auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(addr_i64, resv_addr_i64_ptr, 8); auto resv_val_i64 = ReadMemory(addr_i64, 64, 8, false); - auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, R_VALUE)); + auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, R_VALUE)); auto resv_val_i64_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(resv_val_i64, resv_val_i64_ptr, 8); @@ -2327,12 +2309,12 @@ void PPULLVMRecompiler::LDARX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LDARX", &PPUInterpreter::LDARX, rd, ra, rb); } -void PPULLVMRecompiler::DCBF(u32 ra, u32 rb) { +void Compiler::DCBF(u32 ra, u32 rb) { // TODO: Implement this //InterpreterCall("DCBF", &PPUInterpreter::DCBF, ra, rb); } -void PPULLVMRecompiler::LBZX(u32 rd, u32 ra, u32 rb) { +void Compiler::LBZX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2345,7 +2327,7 @@ void PPULLVMRecompiler::LBZX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LBZX", &PPUInterpreter::LBZX, rd, ra, rb); } -void PPULLVMRecompiler::LVX(u32 vd, u32 ra, u32 rb) { +void Compiler::LVX(u32 vd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2358,7 +2340,7 @@ void PPULLVMRecompiler::LVX(u32 vd, u32 ra, u32 rb) { //InterpreterCall("LVX", &PPUInterpreter::LVX, vd, ra, rb); } -void PPULLVMRecompiler::NEG(u32 rd, u32 ra, u32 oe, bool rc) { +void Compiler::NEG(u32 rd, u32 ra, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); auto diff_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(0), ra_i64); SetGpr(rd, diff_i64); @@ -2373,7 +2355,7 @@ void PPULLVMRecompiler::NEG(u32 rd, u32 ra, u32 oe, bool rc) { //InterpreterCall("NEG", &PPUInterpreter::NEG, rd, ra, oe, rc); } -void PPULLVMRecompiler::LBZUX(u32 rd, u32 ra, u32 rb) { +void Compiler::LBZUX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2385,7 +2367,7 @@ void PPULLVMRecompiler::LBZUX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LBZUX", &PPUInterpreter::LBZUX, rd, ra, rb); } -void PPULLVMRecompiler::NOR(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::NOR(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i64 = GetGpr(rs); auto rb_i64 = GetGpr(rb); auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); @@ -2398,7 +2380,7 @@ void PPULLVMRecompiler::NOR(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("NOR", &PPUInterpreter::NOR, ra, rs, rb, rc); } -void PPULLVMRecompiler::STVEBX(u32 vs, u32 ra, u32 rb) { +void Compiler::STVEBX(u32 vs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2413,15 +2395,15 @@ void PPULLVMRecompiler::STVEBX(u32 vs, u32 ra, u32 rb) { //InterpreterCall("STVEBX", &PPUInterpreter::STVEBX, vs, ra, rb); } -void PPULLVMRecompiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { InterpreterCall("SUBFE", &PPUInterpreter::SUBFE, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { InterpreterCall("ADDE", &PPUInterpreter::ADDE, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::MTOCRF(u32 l, u32 crm, u32 rs) { +void Compiler::MTOCRF(u32 l, u32 crm, u32 rs) { auto rs_i32 = GetGpr(rs, 32); auto cr_i32 = GetCr(); u32 mask = 0; @@ -2442,7 +2424,7 @@ void PPULLVMRecompiler::MTOCRF(u32 l, u32 crm, u32 rs) { //InterpreterCall("MTOCRF", &PPUInterpreter::MTOCRF, l, crm, rs); } -void PPULLVMRecompiler::STDX(u32 rs, u32 ra, u32 rb) { +void Compiler::STDX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2453,11 +2435,11 @@ void PPULLVMRecompiler::STDX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STDX", &PPUInterpreter::STDX, rs, ra, rb); } -void PPULLVMRecompiler::STWCX_(u32 rs, u32 ra, u32 rb) { +void Compiler::STWCX_(u32 rs, u32 ra, u32 rb) { InterpreterCall("STWCX_", &PPUInterpreter::STWCX_, rs, ra, rb); } -void PPULLVMRecompiler::STWX(u32 rs, u32 ra, u32 rb) { +void Compiler::STWX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2468,7 +2450,7 @@ void PPULLVMRecompiler::STWX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STWX", &PPUInterpreter::STWX, rs, ra, rb); } -void PPULLVMRecompiler::STVEHX(u32 vs, u32 ra, u32 rb) { +void Compiler::STVEHX(u32 vs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2485,7 +2467,7 @@ void PPULLVMRecompiler::STVEHX(u32 vs, u32 ra, u32 rb) { //InterpreterCall("STVEHX", &PPUInterpreter::STVEHX, vs, ra, rb); } -void PPULLVMRecompiler::STDUX(u32 rs, u32 ra, u32 rb) { +void Compiler::STDUX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2495,7 +2477,7 @@ void PPULLVMRecompiler::STDUX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STDUX", &PPUInterpreter::STDUX, rs, ra, rb); } -void PPULLVMRecompiler::STWUX(u32 rs, u32 ra, u32 rb) { +void Compiler::STWUX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2505,7 +2487,7 @@ void PPULLVMRecompiler::STWUX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STWUX", &PPUInterpreter::STWUX, rs, ra, rb); } -void PPULLVMRecompiler::STVEWX(u32 vs, u32 ra, u32 rb) { +void Compiler::STVEWX(u32 vs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2522,7 +2504,7 @@ void PPULLVMRecompiler::STVEWX(u32 vs, u32 ra, u32 rb) { //InterpreterCall("STVEWX", &PPUInterpreter::STVEWX, vs, ra, rb); } -void PPULLVMRecompiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { +void Compiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); auto ca_i64 = GetXerCa(); auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); @@ -2537,15 +2519,15 @@ void PPULLVMRecompiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { //InterpreterCall("ADDZE", &PPUInterpreter::ADDZE, rd, ra, oe, rc); } -void PPULLVMRecompiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { +void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { InterpreterCall("SUBFZE", &PPUInterpreter::SUBFZE, rd, ra, oe, rc); } -void PPULLVMRecompiler::STDCX_(u32 rs, u32 ra, u32 rb) { +void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) { InterpreterCall("STDCX_", &PPUInterpreter::STDCX_, rs, ra, rb); } -void PPULLVMRecompiler::STBX(u32 rs, u32 ra, u32 rb) { +void Compiler::STBX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2556,7 +2538,7 @@ void PPULLVMRecompiler::STBX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STBX", &PPUInterpreter::STBX, rs, ra, rb); } -void PPULLVMRecompiler::STVX(u32 vs, u32 ra, u32 rb) { +void Compiler::STVX(u32 vs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2568,11 +2550,11 @@ void PPULLVMRecompiler::STVX(u32 vs, u32 ra, u32 rb) { //InterpreterCall("STVX", &PPUInterpreter::STVX, vs, ra, rb); } -void PPULLVMRecompiler::SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { +void Compiler::SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { InterpreterCall("SUBFME", &PPUInterpreter::SUBFME, rd, ra, oe, rc); } -void PPULLVMRecompiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); auto rb_i64 = GetGpr(rb); auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); @@ -2586,11 +2568,11 @@ void PPULLVMRecompiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("MULLD", &PPUInterpreter::MULLD, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::ADDME(u32 rd, u32 ra, u32 oe, bool rc) { +void Compiler::ADDME(u32 rd, u32 ra, u32 oe, bool rc) { InterpreterCall("ADDME", &PPUInterpreter::ADDME, rd, ra, oe, rc); } -void PPULLVMRecompiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i32 = GetGpr(ra, 32); auto rb_i32 = GetGpr(rb, 32); auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); @@ -2606,12 +2588,12 @@ void PPULLVMRecompiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("MULLW", &PPUInterpreter::MULLW, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::DCBTST(u32 ra, u32 rb, u32 th) { +void Compiler::DCBTST(u32 ra, u32 rb, u32 th) { // TODO: Implement this //InterpreterCall("DCBTST", &PPUInterpreter::DCBTST, ra, rb, th); } -void PPULLVMRecompiler::STBUX(u32 rs, u32 ra, u32 rb) { +void Compiler::STBUX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2621,7 +2603,7 @@ void PPULLVMRecompiler::STBUX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STBUX", &PPUInterpreter::STBUX, rs, ra, rb); } -void PPULLVMRecompiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); auto rb_i64 = GetGpr(rb); auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, rb_i64); @@ -2637,12 +2619,12 @@ void PPULLVMRecompiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("ADD", &PPUInterpreter::ADD, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::DCBT(u32 ra, u32 rb, u32 th) { +void Compiler::DCBT(u32 ra, u32 rb, u32 th) { // TODO: Implement this using prefetch //InterpreterCall("DCBT", &PPUInterpreter::DCBT, ra, rb, th); } -void PPULLVMRecompiler::LHZX(u32 rd, u32 ra, u32 rb) { +void Compiler::LHZX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2655,15 +2637,15 @@ void PPULLVMRecompiler::LHZX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LHZX", &PPUInterpreter::LHZX, rd, ra, rb); } -void PPULLVMRecompiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) { InterpreterCall("EQV", &PPUInterpreter::EQV, ra, rs, rb, rc); } -void PPULLVMRecompiler::ECIWX(u32 rd, u32 ra, u32 rb) { +void Compiler::ECIWX(u32 rd, u32 ra, u32 rb) { InterpreterCall("ECIWX", &PPUInterpreter::ECIWX, rd, ra, rb); } -void PPULLVMRecompiler::LHZUX(u32 rd, u32 ra, u32 rb) { +void Compiler::LHZUX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2675,7 +2657,7 @@ void PPULLVMRecompiler::LHZUX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LHZUX", &PPUInterpreter::LHZUX, rd, ra, rb); } -void PPULLVMRecompiler::XOR(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::XOR(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i64 = GetGpr(rs); auto rb_i64 = GetGpr(rb); auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); @@ -2687,7 +2669,7 @@ void PPULLVMRecompiler::XOR(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("XOR", &PPUInterpreter::XOR, ra, rs, rb, rc); } -void PPULLVMRecompiler::MFSPR(u32 rd, u32 spr) { +void Compiler::MFSPR(u32 rd, u32 spr) { Value * rd_i64; auto n = (spr >> 5) | ((spr & 0x1f) << 5); @@ -2713,7 +2695,7 @@ void PPULLVMRecompiler::MFSPR(u32 rd, u32 spr) { //InterpreterCall("MFSPR", &PPUInterpreter::MFSPR, rd, spr); } -void PPULLVMRecompiler::LWAX(u32 rd, u32 ra, u32 rb) { +void Compiler::LWAX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2726,11 +2708,11 @@ void PPULLVMRecompiler::LWAX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LWAX", &PPUInterpreter::LWAX, rd, ra, rb); } -void PPULLVMRecompiler::DST(u32 ra, u32 rb, u32 strm, u32 t) { +void Compiler::DST(u32 ra, u32 rb, u32 strm, u32 t) { InterpreterCall("DST", &PPUInterpreter::DST, ra, rb, strm, t); } -void PPULLVMRecompiler::LHAX(u32 rd, u32 ra, u32 rb) { +void Compiler::LHAX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2743,21 +2725,13 @@ void PPULLVMRecompiler::LHAX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LHAX", &PPUInterpreter::LHAX, rd, ra, rb); } -void PPULLVMRecompiler::LVXL(u32 vd, u32 ra, u32 rb) { +void Compiler::LVXL(u32 vd, u32 ra, u32 rb) { LVX(vd, ra, rb); //InterpreterCall("LVXL", &PPUInterpreter::LVXL, vd, ra, rb); } -void PPULLVMRecompiler::MFTB(u32 rd, u32 spr) { - static Function * s_get_time_fn = nullptr; - - if (s_get_time_fn == nullptr) { - s_get_time_fn = (Function *)m_module->getOrInsertFunction("get_time", m_ir_builder->getInt64Ty(), nullptr); - s_get_time_fn->setCallingConv(CallingConv::X86_64_Win64); - m_execution_engine->addGlobalMapping(s_get_time_fn, (void *)get_time); - } - - auto tb_i64 = (Value *)m_ir_builder->CreateCall(s_get_time_fn); +void Compiler::MFTB(u32 rd, u32 spr) { + auto tb_i64 = Call("get_time", get_time); u32 n = (spr >> 5) | ((spr & 0x1f) << 5); if (n == 0x10D) { @@ -2767,7 +2741,7 @@ void PPULLVMRecompiler::MFTB(u32 rd, u32 spr) { SetGpr(rd, tb_i64); } -void PPULLVMRecompiler::LWAUX(u32 rd, u32 ra, u32 rb) { +void Compiler::LWAUX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2779,11 +2753,11 @@ void PPULLVMRecompiler::LWAUX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LWAUX", &PPUInterpreter::LWAUX, rd, ra, rb); } -void PPULLVMRecompiler::DSTST(u32 ra, u32 rb, u32 strm, u32 t) { +void Compiler::DSTST(u32 ra, u32 rb, u32 strm, u32 t) { InterpreterCall("DSTST", &PPUInterpreter::DSTST, ra, rb, strm, t); } -void PPULLVMRecompiler::LHAUX(u32 rd, u32 ra, u32 rb) { +void Compiler::LHAUX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2795,7 +2769,7 @@ void PPULLVMRecompiler::LHAUX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LHAUX", &PPUInterpreter::LHAUX, rd, ra, rb); } -void PPULLVMRecompiler::STHX(u32 rs, u32 ra, u32 rb) { +void Compiler::STHX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2806,15 +2780,15 @@ void PPULLVMRecompiler::STHX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STHX", &PPUInterpreter::STHX, rs, ra, rb); } -void PPULLVMRecompiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) { InterpreterCall("ORC", &PPUInterpreter::ORC, ra, rs, rb, rc); } -void PPULLVMRecompiler::ECOWX(u32 rs, u32 ra, u32 rb) { +void Compiler::ECOWX(u32 rs, u32 ra, u32 rb) { InterpreterCall("ECOWX", &PPUInterpreter::ECOWX, rs, ra, rb); } -void PPULLVMRecompiler::STHUX(u32 rs, u32 ra, u32 rb) { +void Compiler::STHUX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -2824,7 +2798,7 @@ void PPULLVMRecompiler::STHUX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STHUX", &PPUInterpreter::STHUX, rs, ra, rb); } -void PPULLVMRecompiler::OR(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::OR(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i64 = GetGpr(rs); auto rb_i64 = GetGpr(rb); auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); @@ -2836,7 +2810,7 @@ void PPULLVMRecompiler::OR(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("OR", &PPUInterpreter::OR, ra, rs, rb, rc); } -void PPULLVMRecompiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); auto rb_i64 = GetGpr(rb); auto res_i64 = m_ir_builder->CreateUDiv(ra_i64, rb_i64); @@ -2851,7 +2825,7 @@ void PPULLVMRecompiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("DIVDU", &PPUInterpreter::DIVDU, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i32 = GetGpr(ra, 32); auto rb_i32 = GetGpr(rb, 32); auto res_i32 = m_ir_builder->CreateUDiv(ra_i32, rb_i32); @@ -2867,7 +2841,7 @@ void PPULLVMRecompiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("DIVWU", &PPUInterpreter::DIVWU, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::MTSPR(u32 spr, u32 rs) { +void Compiler::MTSPR(u32 spr, u32 rs) { auto rs_i64 = GetGpr(rs); auto n = (spr >> 5) | ((spr & 0x1f) << 5); @@ -2892,16 +2866,16 @@ void PPULLVMRecompiler::MTSPR(u32 spr, u32 rs) { //InterpreterCall("MTSPR", &PPUInterpreter::MTSPR, spr, rs); } -void PPULLVMRecompiler::NAND(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::NAND(u32 ra, u32 rs, u32 rb, bool rc) { InterpreterCall("NAND", &PPUInterpreter::NAND, ra, rs, rb, rc); } -void PPULLVMRecompiler::STVXL(u32 vs, u32 ra, u32 rb) { +void Compiler::STVXL(u32 vs, u32 ra, u32 rb) { STVX(vs, ra, rb); //InterpreterCall("STVXL", &PPUInterpreter::STVXL, vs, ra, rb); } -void PPULLVMRecompiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i64 = GetGpr(ra); auto rb_i64 = GetGpr(rb); auto res_i64 = m_ir_builder->CreateSDiv(ra_i64, rb_i64); @@ -2916,7 +2890,7 @@ void PPULLVMRecompiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("DIVD", &PPUInterpreter::DIVD, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { +void Compiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { auto ra_i32 = GetGpr(ra, 32); auto rb_i32 = GetGpr(rb, 32); auto res_i32 = m_ir_builder->CreateSDiv(ra_i32, rb_i32); @@ -2932,7 +2906,7 @@ void PPULLVMRecompiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { //InterpreterCall("DIVW", &PPUInterpreter::DIVW, rd, ra, rb, oe, rc); } -void PPULLVMRecompiler::LVLX(u32 vd, u32 ra, u32 rb) { +void Compiler::LVLX(u32 vd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2949,7 +2923,7 @@ void PPULLVMRecompiler::LVLX(u32 vd, u32 ra, u32 rb) { //InterpreterCall("LVLX", &PPUInterpreter::LVLX, vd, ra, rb); } -void PPULLVMRecompiler::LDBRX(u32 rd, u32 ra, u32 rb) { +void Compiler::LDBRX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2961,11 +2935,11 @@ void PPULLVMRecompiler::LDBRX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LDBRX", &PPUInterpreter::LDBRX, rd, ra, rb); } -void PPULLVMRecompiler::LSWX(u32 rd, u32 ra, u32 rb) { +void Compiler::LSWX(u32 rd, u32 ra, u32 rb) { InterpreterCall("LSWX", &PPUInterpreter::LSWX, rd, ra, rb); } -void PPULLVMRecompiler::LWBRX(u32 rd, u32 ra, u32 rb) { +void Compiler::LWBRX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2978,7 +2952,7 @@ void PPULLVMRecompiler::LWBRX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LWBRX", &PPUInterpreter::LWBRX, rd, ra, rb); } -void PPULLVMRecompiler::LFSX(u32 frd, u32 ra, u32 rb) { +void Compiler::LFSX(u32 frd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -2990,7 +2964,7 @@ void PPULLVMRecompiler::LFSX(u32 frd, u32 ra, u32 rb) { //InterpreterCall("LFSX", &PPUInterpreter::LFSX, frd, ra, rb); } -void PPULLVMRecompiler::SRW(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::SRW(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); auto rb_i8 = GetGpr(rb, 8); @@ -3006,7 +2980,7 @@ void PPULLVMRecompiler::SRW(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("SRW", &PPUInterpreter::SRW, ra, rs, rb, rc); } -void PPULLVMRecompiler::SRD(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::SRD(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i64 = GetGpr(rs); auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); auto rb_i8 = GetGpr(rb, 8); @@ -3023,7 +2997,7 @@ void PPULLVMRecompiler::SRD(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("SRD", &PPUInterpreter::SRD, ra, rs, rb, rc); } -void PPULLVMRecompiler::LVRX(u32 vd, u32 ra, u32 rb) { +void Compiler::LVRX(u32 vd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3045,7 +3019,7 @@ void PPULLVMRecompiler::LVRX(u32 vd, u32 ra, u32 rb) { //InterpreterCall("LVRX", &PPUInterpreter::LVRX, vd, ra, rb); } -void PPULLVMRecompiler::LSWI(u32 rd, u32 ra, u32 nb) { +void Compiler::LSWI(u32 rd, u32 ra, u32 nb) { auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); nb = nb ? nb : 32; @@ -3067,7 +3041,7 @@ void PPULLVMRecompiler::LSWI(u32 rd, u32 ra, u32 nb) { //InterpreterCall("LSWI", &PPUInterpreter::LSWI, rd, ra, nb); } -void PPULLVMRecompiler::LFSUX(u32 frd, u32 ra, u32 rb) { +void Compiler::LFSUX(u32 frd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3077,12 +3051,12 @@ void PPULLVMRecompiler::LFSUX(u32 frd, u32 ra, u32 rb) { //InterpreterCall("LFSUX", &PPUInterpreter::LFSUX, frd, ra, rb); } -void PPULLVMRecompiler::SYNC(u32 l) { +void Compiler::SYNC(u32 l) { m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); //InterpreterCall("SYNC", &PPUInterpreter::SYNC, l); } -void PPULLVMRecompiler::LFDX(u32 frd, u32 ra, u32 rb) { +void Compiler::LFDX(u32 frd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3094,7 +3068,7 @@ void PPULLVMRecompiler::LFDX(u32 frd, u32 ra, u32 rb) { //InterpreterCall("LFDX", &PPUInterpreter::LFDX, frd, ra, rb); } -void PPULLVMRecompiler::LFDUX(u32 frd, u32 ra, u32 rb) { +void Compiler::LFDUX(u32 frd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3104,15 +3078,15 @@ void PPULLVMRecompiler::LFDUX(u32 frd, u32 ra, u32 rb) { //InterpreterCall("LFDUX", &PPUInterpreter::LFDUX, frd, ra, rb); } -void PPULLVMRecompiler::STVLX(u32 vs, u32 ra, u32 rb) { +void Compiler::STVLX(u32 vs, u32 ra, u32 rb) { InterpreterCall("STVLX", &PPUInterpreter::STVLX, vs, ra, rb); } -void PPULLVMRecompiler::STSWX(u32 rs, u32 ra, u32 rb) { +void Compiler::STSWX(u32 rs, u32 ra, u32 rb) { InterpreterCall("STSWX", &PPUInterpreter::STSWX, rs, ra, rb); } -void PPULLVMRecompiler::STWBRX(u32 rs, u32 ra, u32 rb) { +void Compiler::STWBRX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3123,7 +3097,7 @@ void PPULLVMRecompiler::STWBRX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STWBRX", &PPUInterpreter::STWBRX, rs, ra, rb); } -void PPULLVMRecompiler::STFSX(u32 frs, u32 ra, u32 rb) { +void Compiler::STFSX(u32 frs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3135,11 +3109,11 @@ void PPULLVMRecompiler::STFSX(u32 frs, u32 ra, u32 rb) { //InterpreterCall("STFSX", &PPUInterpreter::STFSX, frs, ra, rb); } -void PPULLVMRecompiler::STVRX(u32 vs, u32 ra, u32 rb) { +void Compiler::STVRX(u32 vs, u32 ra, u32 rb) { InterpreterCall("STVRX", &PPUInterpreter::STVRX, vs, ra, rb); } -void PPULLVMRecompiler::STFSUX(u32 frs, u32 ra, u32 rb) { +void Compiler::STFSUX(u32 frs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3150,7 +3124,7 @@ void PPULLVMRecompiler::STFSUX(u32 frs, u32 ra, u32 rb) { //InterpreterCall("STFSUX", &PPUInterpreter::STFSUX, frs, ra, rb); } -void PPULLVMRecompiler::STSWI(u32 rd, u32 ra, u32 nb) { +void Compiler::STSWI(u32 rd, u32 ra, u32 nb) { auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); nb = nb ? nb : 32; @@ -3185,7 +3159,7 @@ void PPULLVMRecompiler::STSWI(u32 rd, u32 ra, u32 nb) { //InterpreterCall("STSWI", &PPUInterpreter::STSWI, rd, ra, nb); } -void PPULLVMRecompiler::STFDX(u32 frs, u32 ra, u32 rb) { +void Compiler::STFDX(u32 frs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3197,7 +3171,7 @@ void PPULLVMRecompiler::STFDX(u32 frs, u32 ra, u32 rb) { //InterpreterCall("STFDX", &PPUInterpreter::STFDX, frs, ra, rb); } -void PPULLVMRecompiler::STFDUX(u32 frs, u32 ra, u32 rb) { +void Compiler::STFDUX(u32 frs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3208,12 +3182,12 @@ void PPULLVMRecompiler::STFDUX(u32 frs, u32 ra, u32 rb) { //InterpreterCall("STFDUX", &PPUInterpreter::STFDUX, frs, ra, rb); } -void PPULLVMRecompiler::LVLXL(u32 vd, u32 ra, u32 rb) { +void Compiler::LVLXL(u32 vd, u32 ra, u32 rb) { LVLX(vd, ra, rb); //InterpreterCall("LVLXL", &PPUInterpreter::LVLXL, vd, ra, rb); } -void PPULLVMRecompiler::LHBRX(u32 rd, u32 ra, u32 rb) { +void Compiler::LHBRX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3226,7 +3200,7 @@ void PPULLVMRecompiler::LHBRX(u32 rd, u32 ra, u32 rb) { //InterpreterCall("LHBRX", &PPUInterpreter::LHBRX, rd, ra, rb); } -void PPULLVMRecompiler::SRAW(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::SRAW(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); @@ -3250,7 +3224,7 @@ void PPULLVMRecompiler::SRAW(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("SRAW", &PPUInterpreter::SRAW, ra, rs, rb, rc); } -void PPULLVMRecompiler::SRAD(u32 ra, u32 rs, u32 rb, bool rc) { +void Compiler::SRAD(u32 ra, u32 rs, u32 rb, bool rc) { auto rs_i64 = GetGpr(rs); auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); @@ -3275,16 +3249,16 @@ void PPULLVMRecompiler::SRAD(u32 ra, u32 rs, u32 rb, bool rc) { //InterpreterCall("SRAD", &PPUInterpreter::SRAD, ra, rs, rb, rc); } -void PPULLVMRecompiler::LVRXL(u32 vd, u32 ra, u32 rb) { +void Compiler::LVRXL(u32 vd, u32 ra, u32 rb) { LVRX(vd, ra, rb); //InterpreterCall("LVRXL", &PPUInterpreter::LVRXL, vd, ra, rb); } -void PPULLVMRecompiler::DSS(u32 strm, u32 a) { +void Compiler::DSS(u32 strm, u32 a) { InterpreterCall("DSS", &PPUInterpreter::DSS, strm, a); } -void PPULLVMRecompiler::SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { +void Compiler::SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); @@ -3305,7 +3279,7 @@ void PPULLVMRecompiler::SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { //InterpreterCall("SRAWI", &PPUInterpreter::SRAWI, ra, rs, sh, rc); } -void PPULLVMRecompiler::SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { +void Compiler::SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { auto rs_i64 = GetGpr(rs); auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); @@ -3327,22 +3301,22 @@ void PPULLVMRecompiler::SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { //InterpreterCall("SRADI1", &PPUInterpreter::SRADI1, ra, rs, sh, rc); } -void PPULLVMRecompiler::SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { +void Compiler::SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { SRADI1(ra, rs, sh, rc); //InterpreterCall("SRADI2", &PPUInterpreter::SRADI2, ra, rs, sh, rc); } -void PPULLVMRecompiler::EIEIO() { +void Compiler::EIEIO() { m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); //InterpreterCall("EIEIO", &PPUInterpreter::EIEIO); } -void PPULLVMRecompiler::STVLXL(u32 vs, u32 ra, u32 rb) { +void Compiler::STVLXL(u32 vs, u32 ra, u32 rb) { STVLX(vs, ra, rb); //InterpreterCall("STVLXL", &PPUInterpreter::STVLXL, vs, ra, rb); } -void PPULLVMRecompiler::STHBRX(u32 rs, u32 ra, u32 rb) { +void Compiler::STHBRX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3353,7 +3327,7 @@ void PPULLVMRecompiler::STHBRX(u32 rs, u32 ra, u32 rb) { //InterpreterCall("STHBRX", &PPUInterpreter::STHBRX, rs, ra, rb); } -void PPULLVMRecompiler::EXTSH(u32 ra, u32 rs, bool rc) { +void Compiler::EXTSH(u32 ra, u32 rs, bool rc) { auto rs_i16 = GetGpr(rs, 16); auto rs_i64 = m_ir_builder->CreateSExt(rs_i16, m_ir_builder->getInt64Ty()); SetGpr(ra, rs_i64); @@ -3364,12 +3338,12 @@ void PPULLVMRecompiler::EXTSH(u32 ra, u32 rs, bool rc) { //InterpreterCall("EXTSH", &PPUInterpreter::EXTSH, ra, rs, rc); } -void PPULLVMRecompiler::STVRXL(u32 vs, u32 ra, u32 rb) { +void Compiler::STVRXL(u32 vs, u32 ra, u32 rb) { STVRX(vs, ra, rb); //InterpreterCall("STVRXL", &PPUInterpreter::STVRXL, vs, ra, rb); } -void PPULLVMRecompiler::EXTSB(u32 ra, u32 rs, bool rc) { +void Compiler::EXTSB(u32 ra, u32 rs, bool rc) { auto rs_i8 = GetGpr(rs, 8); auto rs_i64 = m_ir_builder->CreateSExt(rs_i8, m_ir_builder->getInt64Ty()); SetGpr(ra, rs_i64); @@ -3380,7 +3354,7 @@ void PPULLVMRecompiler::EXTSB(u32 ra, u32 rs, bool rc) { //InterpreterCall("EXTSB", &PPUInterpreter::EXTSB, ra, rs, rc); } -void PPULLVMRecompiler::STFIWX(u32 frs, u32 ra, u32 rb) { +void Compiler::STFIWX(u32 frs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3393,7 +3367,7 @@ void PPULLVMRecompiler::STFIWX(u32 frs, u32 ra, u32 rb) { //InterpreterCall("STFIWX", &PPUInterpreter::STFIWX, frs, ra, rb); } -void PPULLVMRecompiler::EXTSW(u32 ra, u32 rs, bool rc) { +void Compiler::EXTSW(u32 ra, u32 rs, bool rc) { auto rs_i32 = GetGpr(rs, 32); auto rs_i64 = m_ir_builder->CreateSExt(rs_i32, m_ir_builder->getInt64Ty()); SetGpr(ra, rs_i64); @@ -3404,11 +3378,11 @@ void PPULLVMRecompiler::EXTSW(u32 ra, u32 rs, bool rc) { //InterpreterCall("EXTSW", &PPUInterpreter::EXTSW, ra, rs, rc); } -void PPULLVMRecompiler::ICBI(u32 ra, u32 rs) { +void Compiler::ICBI(u32 ra, u32 rs) { InterpreterCall("ICBI", &PPUInterpreter::ICBI, ra, rs); } -void PPULLVMRecompiler::DCBZ(u32 ra, u32 rb) { +void Compiler::DCBZ(u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3425,7 +3399,7 @@ void PPULLVMRecompiler::DCBZ(u32 ra, u32 rb) { //InterpreterCall("DCBZ", &PPUInterpreter::DCBZ, ra, rb);L } -void PPULLVMRecompiler::LWZ(u32 rd, u32 ra, s32 d) { +void Compiler::LWZ(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3438,7 +3412,7 @@ void PPULLVMRecompiler::LWZ(u32 rd, u32 ra, s32 d) { //InterpreterCall("LWZ", &PPUInterpreter::LWZ, rd, ra, d); } -void PPULLVMRecompiler::LWZU(u32 rd, u32 ra, s32 d) { +void Compiler::LWZU(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3450,7 +3424,7 @@ void PPULLVMRecompiler::LWZU(u32 rd, u32 ra, s32 d) { //InterpreterCall("LWZU", &PPUInterpreter::LWZU, rd, ra, d); } -void PPULLVMRecompiler::LBZ(u32 rd, u32 ra, s32 d) { +void Compiler::LBZ(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3463,7 +3437,7 @@ void PPULLVMRecompiler::LBZ(u32 rd, u32 ra, s32 d) { //InterpreterCall("LBZ", &PPUInterpreter::LBZ, rd, ra, d); } -void PPULLVMRecompiler::LBZU(u32 rd, u32 ra, s32 d) { +void Compiler::LBZU(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3475,7 +3449,7 @@ void PPULLVMRecompiler::LBZU(u32 rd, u32 ra, s32 d) { //InterpreterCall("LBZU", &PPUInterpreter::LBZU, rd, ra, d); } -void PPULLVMRecompiler::STW(u32 rs, u32 ra, s32 d) { +void Compiler::STW(u32 rs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3486,7 +3460,7 @@ void PPULLVMRecompiler::STW(u32 rs, u32 ra, s32 d) { //InterpreterCall("STW", &PPUInterpreter::STW, rs, ra, d); } -void PPULLVMRecompiler::STWU(u32 rs, u32 ra, s32 d) { +void Compiler::STWU(u32 rs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3496,7 +3470,7 @@ void PPULLVMRecompiler::STWU(u32 rs, u32 ra, s32 d) { //InterpreterCall("STWU", &PPUInterpreter::STWU, rs, ra, d); } -void PPULLVMRecompiler::STB(u32 rs, u32 ra, s32 d) { +void Compiler::STB(u32 rs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3507,7 +3481,7 @@ void PPULLVMRecompiler::STB(u32 rs, u32 ra, s32 d) { //InterpreterCall("STB", &PPUInterpreter::STB, rs, ra, d); } -void PPULLVMRecompiler::STBU(u32 rs, u32 ra, s32 d) { +void Compiler::STBU(u32 rs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3517,7 +3491,7 @@ void PPULLVMRecompiler::STBU(u32 rs, u32 ra, s32 d) { //InterpreterCall("STBU", &PPUInterpreter::STBU, rs, ra, d); } -void PPULLVMRecompiler::LHZ(u32 rd, u32 ra, s32 d) { +void Compiler::LHZ(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3530,7 +3504,7 @@ void PPULLVMRecompiler::LHZ(u32 rd, u32 ra, s32 d) { //InterpreterCall("LHZ", &PPUInterpreter::LHZ, rd, ra, d); } -void PPULLVMRecompiler::LHZU(u32 rd, u32 ra, s32 d) { +void Compiler::LHZU(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3542,7 +3516,7 @@ void PPULLVMRecompiler::LHZU(u32 rd, u32 ra, s32 d) { //InterpreterCall("LHZU", &PPUInterpreter::LHZU, rd, ra, d); } -void PPULLVMRecompiler::LHA(u32 rd, u32 ra, s32 d) { +void Compiler::LHA(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3555,7 +3529,7 @@ void PPULLVMRecompiler::LHA(u32 rd, u32 ra, s32 d) { //InterpreterCall("LHA", &PPUInterpreter::LHA, rd, ra, d); } -void PPULLVMRecompiler::LHAU(u32 rd, u32 ra, s32 d) { +void Compiler::LHAU(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3567,7 +3541,7 @@ void PPULLVMRecompiler::LHAU(u32 rd, u32 ra, s32 d) { //InterpreterCall("LHAU", &PPUInterpreter::LHAU, rd, ra, d); } -void PPULLVMRecompiler::STH(u32 rs, u32 ra, s32 d) { +void Compiler::STH(u32 rs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3578,7 +3552,7 @@ void PPULLVMRecompiler::STH(u32 rs, u32 ra, s32 d) { //InterpreterCall("STH", &PPUInterpreter::STH, rs, ra, d); } -void PPULLVMRecompiler::STHU(u32 rs, u32 ra, s32 d) { +void Compiler::STHU(u32 rs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3588,7 +3562,7 @@ void PPULLVMRecompiler::STHU(u32 rs, u32 ra, s32 d) { //InterpreterCall("STHU", &PPUInterpreter::STHU, rs, ra, d); } -void PPULLVMRecompiler::LMW(u32 rd, u32 ra, s32 d) { +void Compiler::LMW(u32 rd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); @@ -3604,7 +3578,7 @@ void PPULLVMRecompiler::LMW(u32 rd, u32 ra, s32 d) { //InterpreterCall("LMW", &PPUInterpreter::LMW, rd, ra, d); } -void PPULLVMRecompiler::STMW(u32 rs, u32 ra, s32 d) { +void Compiler::STMW(u32 rs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); @@ -3619,7 +3593,7 @@ void PPULLVMRecompiler::STMW(u32 rs, u32 ra, s32 d) { //InterpreterCall("STMW", &PPUInterpreter::STMW, rs, ra, d); } -void PPULLVMRecompiler::LFS(u32 frd, u32 ra, s32 d) { +void Compiler::LFS(u32 frd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3631,7 +3605,7 @@ void PPULLVMRecompiler::LFS(u32 frd, u32 ra, s32 d) { //InterpreterCall("LFS", &PPUInterpreter::LFS, frd, ra, d); } -void PPULLVMRecompiler::LFSU(u32 frd, u32 ra, s32 ds) { +void Compiler::LFSU(u32 frd, u32 ra, s32 ds) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3641,7 +3615,7 @@ void PPULLVMRecompiler::LFSU(u32 frd, u32 ra, s32 ds) { //InterpreterCall("LFSU", &PPUInterpreter::LFSU, frd, ra, ds); } -void PPULLVMRecompiler::LFD(u32 frd, u32 ra, s32 d) { +void Compiler::LFD(u32 frd, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3653,7 +3627,7 @@ void PPULLVMRecompiler::LFD(u32 frd, u32 ra, s32 d) { //InterpreterCall("LFD", &PPUInterpreter::LFD, frd, ra, d); } -void PPULLVMRecompiler::LFDU(u32 frd, u32 ra, s32 ds) { +void Compiler::LFDU(u32 frd, u32 ra, s32 ds) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3664,7 +3638,7 @@ void PPULLVMRecompiler::LFDU(u32 frd, u32 ra, s32 ds) { //InterpreterCall("LFDU", &PPUInterpreter::LFDU, frd, ra, ds); } -void PPULLVMRecompiler::STFS(u32 frs, u32 ra, s32 d) { +void Compiler::STFS(u32 frs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3676,7 +3650,7 @@ void PPULLVMRecompiler::STFS(u32 frs, u32 ra, s32 d) { //InterpreterCall("STFS", &PPUInterpreter::STFS, frs, ra, d); } -void PPULLVMRecompiler::STFSU(u32 frs, u32 ra, s32 d) { +void Compiler::STFSU(u32 frs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3687,7 +3661,7 @@ void PPULLVMRecompiler::STFSU(u32 frs, u32 ra, s32 d) { //InterpreterCall("STFSU", &PPUInterpreter::STFSU, frs, ra, d); } -void PPULLVMRecompiler::STFD(u32 frs, u32 ra, s32 d) { +void Compiler::STFD(u32 frs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3699,7 +3673,7 @@ void PPULLVMRecompiler::STFD(u32 frs, u32 ra, s32 d) { //InterpreterCall("STFD", &PPUInterpreter::STFD, frs, ra, d); } -void PPULLVMRecompiler::STFDU(u32 frs, u32 ra, s32 d) { +void Compiler::STFDU(u32 frs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3710,7 +3684,7 @@ void PPULLVMRecompiler::STFDU(u32 frs, u32 ra, s32 d) { //InterpreterCall("STFDU", &PPUInterpreter::STFDU, frs, ra, d); } -void PPULLVMRecompiler::LD(u32 rd, u32 ra, s32 ds) { +void Compiler::LD(u32 rd, u32 ra, s32 ds) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3722,7 +3696,7 @@ void PPULLVMRecompiler::LD(u32 rd, u32 ra, s32 ds) { //InterpreterCall("LD", &PPUInterpreter::LD, rd, ra, ds); } -void PPULLVMRecompiler::LDU(u32 rd, u32 ra, s32 ds) { +void Compiler::LDU(u32 rd, u32 ra, s32 ds) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3733,7 +3707,7 @@ void PPULLVMRecompiler::LDU(u32 rd, u32 ra, s32 ds) { //InterpreterCall("LDU", &PPUInterpreter::LDU, rd, ra, ds); } -void PPULLVMRecompiler::LWA(u32 rd, u32 ra, s32 ds) { +void Compiler::LWA(u32 rd, u32 ra, s32 ds) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3746,7 +3720,7 @@ void PPULLVMRecompiler::LWA(u32 rd, u32 ra, s32 ds) { //InterpreterCall("LWA", &PPUInterpreter::LWA, rd, ra, ds); } -void PPULLVMRecompiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { +void Compiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); @@ -3758,7 +3732,7 @@ void PPULLVMRecompiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { //InterpreterCall("FDIVS", &PPUInterpreter::FDIVS, frd, fra, frb, rc); } -void PPULLVMRecompiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { +void Compiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); @@ -3770,7 +3744,7 @@ void PPULLVMRecompiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { //InterpreterCall("FSUBS", &PPUInterpreter::FSUBS, frd, fra, frb, rc); } -void PPULLVMRecompiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) { +void Compiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); @@ -3782,7 +3756,7 @@ void PPULLVMRecompiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) { //InterpreterCall("FADDS", &PPUInterpreter::FADDS, frd, fra, frb, rc); } -void PPULLVMRecompiler::FSQRTS(u32 frd, u32 frb, bool rc) { +void Compiler::FSQRTS(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); @@ -3793,11 +3767,11 @@ void PPULLVMRecompiler::FSQRTS(u32 frd, u32 frb, bool rc) { //InterpreterCall("FSQRTS", &PPUInterpreter::FSQRTS, frd, frb, rc); } -void PPULLVMRecompiler::FRES(u32 frd, u32 frb, bool rc) { +void Compiler::FRES(u32 frd, u32 frb, bool rc) { InterpreterCall("FRES", &PPUInterpreter::FRES, frd, frb, rc); } -void PPULLVMRecompiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { +void Compiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { auto ra_f64 = GetFpr(fra); auto rc_f64 = GetFpr(frc); auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); @@ -3809,7 +3783,7 @@ void PPULLVMRecompiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { //InterpreterCall("FMULS", &PPUInterpreter::FMULS, frd, fra, frc, rc); } -void PPULLVMRecompiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto rc_f64 = GetFpr(frc); @@ -3822,7 +3796,7 @@ void PPULLVMRecompiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { //InterpreterCall("FMADDS", &PPUInterpreter::FMADDS, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto rc_f64 = GetFpr(frc); @@ -3836,7 +3810,7 @@ void PPULLVMRecompiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { //InterpreterCall("FMSUBS", &PPUInterpreter::FMSUBS, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto rc_f64 = GetFpr(frc); @@ -3851,7 +3825,7 @@ void PPULLVMRecompiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { //InterpreterCall("FNMSUBS", &PPUInterpreter::FNMSUBS, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto rc_f64 = GetFpr(frc); @@ -3865,7 +3839,7 @@ void PPULLVMRecompiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { //InterpreterCall("FNMADDS", &PPUInterpreter::FNMADDS, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::STD(u32 rs, u32 ra, s32 d) { +void Compiler::STD(u32 rs, u32 ra, s32 d) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); if (ra) { auto ra_i64 = GetGpr(ra); @@ -3876,7 +3850,7 @@ void PPULLVMRecompiler::STD(u32 rs, u32 ra, s32 d) { //InterpreterCall("STD", &PPUInterpreter::STD, rs, ra, d); } -void PPULLVMRecompiler::STDU(u32 rs, u32 ra, s32 ds) { +void Compiler::STDU(u32 rs, u32 ra, s32 ds) { auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); @@ -3886,47 +3860,47 @@ void PPULLVMRecompiler::STDU(u32 rs, u32 ra, s32 ds) { //InterpreterCall("STDU", &PPUInterpreter::STDU, rs, ra, ds); } -void PPULLVMRecompiler::MTFSB1(u32 crbd, bool rc) { +void Compiler::MTFSB1(u32 crbd, bool rc) { InterpreterCall("MTFSB1", &PPUInterpreter::MTFSB1, crbd, rc); } -void PPULLVMRecompiler::MCRFS(u32 crbd, u32 crbs) { +void Compiler::MCRFS(u32 crbd, u32 crbs) { InterpreterCall("MCRFS", &PPUInterpreter::MCRFS, crbd, crbs); } -void PPULLVMRecompiler::MTFSB0(u32 crbd, bool rc) { +void Compiler::MTFSB0(u32 crbd, bool rc) { InterpreterCall("MTFSB0", &PPUInterpreter::MTFSB0, crbd, rc); } -void PPULLVMRecompiler::MTFSFI(u32 crfd, u32 i, bool rc) { +void Compiler::MTFSFI(u32 crfd, u32 i, bool rc) { InterpreterCall("MTFSFI", &PPUInterpreter::MTFSFI, crfd, i, rc); } -void PPULLVMRecompiler::MFFS(u32 frd, bool rc) { +void Compiler::MFFS(u32 frd, bool rc) { InterpreterCall("MFFS", &PPUInterpreter::MFFS, frd, rc); } -void PPULLVMRecompiler::MTFSF(u32 flm, u32 frb, bool rc) { +void Compiler::MTFSF(u32 flm, u32 frb, bool rc) { InterpreterCall("MTFSF", &PPUInterpreter::MTFSF, flm, frb, rc); } -void PPULLVMRecompiler::FCMPU(u32 crfd, u32 fra, u32 frb) { +void Compiler::FCMPU(u32 crfd, u32 fra, u32 frb) { InterpreterCall("FCMPU", &PPUInterpreter::FCMPU, crfd, fra, frb); } -void PPULLVMRecompiler::FRSP(u32 frd, u32 frb, bool rc) { +void Compiler::FRSP(u32 frd, u32 frb, bool rc) { InterpreterCall("FRSP", &PPUInterpreter::FRSP, frd, frb, rc); } -void PPULLVMRecompiler::FCTIW(u32 frd, u32 frb, bool rc) { +void Compiler::FCTIW(u32 frd, u32 frb, bool rc) { InterpreterCall("FCTIW", &PPUInterpreter::FCTIW, frd, frb, rc); } -void PPULLVMRecompiler::FCTIWZ(u32 frd, u32 frb, bool rc) { +void Compiler::FCTIWZ(u32 frd, u32 frb, bool rc) { InterpreterCall("FCTIWZ", &PPUInterpreter::FCTIWZ, frd, frb, rc); } -void PPULLVMRecompiler::FDIV(u32 frd, u32 fra, u32 frb, bool rc) { +void Compiler::FDIV(u32 frd, u32 fra, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); @@ -3936,7 +3910,7 @@ void PPULLVMRecompiler::FDIV(u32 frd, u32 fra, u32 frb, bool rc) { //InterpreterCall("FDIV", &PPUInterpreter::FDIV, frd, fra, frb, rc); } -void PPULLVMRecompiler::FSUB(u32 frd, u32 fra, u32 frb, bool rc) { +void Compiler::FSUB(u32 frd, u32 fra, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); @@ -3946,7 +3920,7 @@ void PPULLVMRecompiler::FSUB(u32 frd, u32 fra, u32 frb, bool rc) { //InterpreterCall("FSUB", &PPUInterpreter::FSUB, frd, fra, frb, rc); } -void PPULLVMRecompiler::FADD(u32 frd, u32 fra, u32 frb, bool rc) { +void Compiler::FADD(u32 frd, u32 fra, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); @@ -3956,7 +3930,7 @@ void PPULLVMRecompiler::FADD(u32 frd, u32 fra, u32 frb, bool rc) { //InterpreterCall("FADD", &PPUInterpreter::FADD, frd, fra, frb, rc); } -void PPULLVMRecompiler::FSQRT(u32 frd, u32 frb, bool rc) { +void Compiler::FSQRT(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); SetFpr(frd, res_f64); @@ -3965,11 +3939,11 @@ void PPULLVMRecompiler::FSQRT(u32 frd, u32 frb, bool rc) { //InterpreterCall("FSQRT", &PPUInterpreter::FSQRT, frd, frb, rc); } -void PPULLVMRecompiler::FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { InterpreterCall("FSEL", &PPUInterpreter::FSEL, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) { +void Compiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) { auto ra_f64 = GetFpr(fra); auto rc_f64 = GetFpr(frc); auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); @@ -3979,11 +3953,11 @@ void PPULLVMRecompiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) { //InterpreterCall("FMUL", &PPUInterpreter::FMUL, frd, fra, frc, rc); } -void PPULLVMRecompiler::FRSQRTE(u32 frd, u32 frb, bool rc) { +void Compiler::FRSQRTE(u32 frd, u32 frb, bool rc) { InterpreterCall("FRSQRTE", &PPUInterpreter::FRSQRTE, frd, frb, rc); } -void PPULLVMRecompiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto rc_f64 = GetFpr(frc); @@ -3995,7 +3969,7 @@ void PPULLVMRecompiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { //InterpreterCall("FMSUB", &PPUInterpreter::FMSUB, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto rc_f64 = GetFpr(frc); @@ -4006,7 +3980,7 @@ void PPULLVMRecompiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { //InterpreterCall("FMADD", &PPUInterpreter::FMADD, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto rc_f64 = GetFpr(frc); @@ -4018,7 +3992,7 @@ void PPULLVMRecompiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { //InterpreterCall("FNMSUB", &PPUInterpreter::FNMSUB, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { +void Compiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto ra_f64 = GetFpr(fra); auto rb_f64 = GetFpr(frb); auto rc_f64 = GetFpr(frc); @@ -4031,11 +4005,11 @@ void PPULLVMRecompiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { //InterpreterCall("FNMADD", &PPUInterpreter::FNMADD, frd, fra, frc, frb, rc); } -void PPULLVMRecompiler::FCMPO(u32 crfd, u32 fra, u32 frb) { +void Compiler::FCMPO(u32 crfd, u32 fra, u32 frb) { InterpreterCall("FCMPO", &PPUInterpreter::FCMPO, crfd, fra, frb); } -void PPULLVMRecompiler::FNEG(u32 frd, u32 frb, bool rc) { +void Compiler::FNEG(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); rb_f64 = m_ir_builder->CreateFNeg(rb_f64); SetFpr(frd, rb_f64); @@ -4044,13 +4018,13 @@ void PPULLVMRecompiler::FNEG(u32 frd, u32 frb, bool rc) { //InterpreterCall("FNEG", &PPUInterpreter::FNEG, frd, frb, rc); } -void PPULLVMRecompiler::FMR(u32 frd, u32 frb, bool rc) { +void Compiler::FMR(u32 frd, u32 frb, bool rc) { SetFpr(frd, GetFpr(frb)); // TODO: Set flags //InterpreterCall("FMR", &PPUInterpreter::FMR, frd, frb, rc); } -void PPULLVMRecompiler::FNABS(u32 frd, u32 frb, bool rc) { +void Compiler::FNABS(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); res_f64 = m_ir_builder->CreateFNeg(res_f64); @@ -4060,7 +4034,7 @@ void PPULLVMRecompiler::FNABS(u32 frd, u32 frb, bool rc) { //InterpreterCall("FNABS", &PPUInterpreter::FNABS, frd, frb, rc); } -void PPULLVMRecompiler::FABS(u32 frd, u32 frb, bool rc) { +void Compiler::FABS(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); SetFpr(frd, res_f64); @@ -4069,15 +4043,15 @@ void PPULLVMRecompiler::FABS(u32 frd, u32 frb, bool rc) { //InterpreterCall("FABS", &PPUInterpreter::FABS, frd, frb, rc); } -void PPULLVMRecompiler::FCTID(u32 frd, u32 frb, bool rc) { +void Compiler::FCTID(u32 frd, u32 frb, bool rc) { InterpreterCall("FCTID", &PPUInterpreter::FCTID, frd, frb, rc); } -void PPULLVMRecompiler::FCTIDZ(u32 frd, u32 frb, bool rc) { +void Compiler::FCTIDZ(u32 frd, u32 frb, bool rc) { InterpreterCall("FCTIDZ", &PPUInterpreter::FCTIDZ, frd, frb, rc); } -void PPULLVMRecompiler::FCFID(u32 frd, u32 frb, bool rc) { +void Compiler::FCFID(u32 frd, u32 frb, bool rc) { auto rb_i64 = GetFpr(frb, 64, true); auto res_f64 = m_ir_builder->CreateSIToFP(rb_i64, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); @@ -4086,12 +4060,26 @@ void PPULLVMRecompiler::FCFID(u32 frd, u32 frb, bool rc) { //InterpreterCall("FCFID", &PPUInterpreter::FCFID, frd, frb, rc); } -void PPULLVMRecompiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { +void Compiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { //InterpreterCall("UNK", &PPUInterpreter::UNK, code, opcode, gcode); } -BasicBlock * PPULLVMRecompiler::GetBlockInFunction(u32 address, Function * function, bool create_if_not_exist) { - auto block_name = fmt::Format("instr_0x%X", address); +std::string Compiler::GetBasicBlockNameFromAddress(u32 address) { + std::string name; + + if (address == 0) { + name = "entry"; + } else if (address == 0xFFFFFFFF) { + name = "unknown"; + } else { + name = fmt::Format("instr_0x%X", address); + } + + return name; +} + +BasicBlock * Compiler::GetBasicBlockFromAddress(u32 address, Function * function, bool create_if_not_exist) { + auto block_name = GetBasicBlockNameFromAddress(address); BasicBlock * block = nullptr; for (auto i = function->getBasicBlockList().begin(); i != function->getBasicBlockList().end(); i++) { if (i->getName() == block_name) { @@ -4107,169 +4095,24 @@ BasicBlock * PPULLVMRecompiler::GetBlockInFunction(u32 address, Function * funct return block; } -void PPULLVMRecompiler::Compile(u32 address) { - auto compilation_start = std::chrono::high_resolution_clock::now(); - - // Get the revision number for this section - u32 revision = 0; - auto compiled = m_compiled.lower_bound(std::make_pair(address, 0)); - if (compiled != m_compiled.end() && compiled->first.first == address) { - revision = ~(compiled->first.second); - revision++; - } - - auto ir_build_start = std::chrono::high_resolution_clock::now(); - - // Create a function for this section - auto function_name = fmt::Format("fn_0x%X_%u", address, revision); - m_current_function = (Function *)m_module->getOrInsertFunction(function_name, m_ir_builder->getVoidTy(), - m_ir_builder->getInt8PtrTy() /*ppu_state*/, - m_ir_builder->getInt8PtrTy() /*interpreter*/, nullptr); - m_current_function->setCallingConv(CallingConv::X86_64_Win64); - auto arg_i = m_current_function->arg_begin(); - arg_i->setName("ppu_state"); - (++arg_i)->setName("interpreter"); - - // Add an entry block that branches to the first instruction - m_ir_builder->SetInsertPoint(BasicBlock::Create(m_ir_builder->getContext(), "entry", m_current_function)); - m_ir_builder->CreateBr(GetBlockInFunction(address, m_current_function, true)); - - // Convert each block in this section to LLVM IR - m_num_instructions = 0; - m_current_function_uncompiled_blocks_list.clear(); - m_current_function_unhit_blocks_list.clear(); - m_current_function_uncompiled_blocks_list.push_back(address); - while (!m_current_function_uncompiled_blocks_list.empty()) { - m_current_instruction_address = m_current_function_uncompiled_blocks_list.front(); - auto block = GetBlockInFunction(m_current_instruction_address, m_current_function, true); - m_hit_branch_instruction = false; - m_ir_builder->SetInsertPoint(block); - m_current_function_uncompiled_blocks_list.pop_front(); - - while (!m_hit_branch_instruction) { - if (!block->getInstList().empty()) { - break; - } - - u32 instr = vm::read32(m_current_instruction_address); - Decode(instr); - m_num_instructions++; - - m_current_instruction_address += 4; - if (!m_hit_branch_instruction) { - block = GetBlockInFunction(m_current_instruction_address, m_current_function, true); - m_ir_builder->CreateBr(block); - m_ir_builder->SetInsertPoint(block); - } - } - } - - auto ir_build_end = std::chrono::high_resolution_clock::now(); - m_ir_build_time += std::chrono::duration_cast(ir_build_end - ir_build_start); - - // Optimize this function - auto optimize_start = std::chrono::high_resolution_clock::now(); - m_fpm->run(*m_current_function); - auto optimize_end = std::chrono::high_resolution_clock::now(); - m_optimizing_time += std::chrono::duration_cast(optimize_end - optimize_start); - - // Translate to machine code - auto translate_start = std::chrono::high_resolution_clock::now(); - MachineCodeInfo mci; - m_execution_engine->runJITOnFunction(m_current_function, &mci); - auto translate_end = std::chrono::high_resolution_clock::now(); - m_translation_time += std::chrono::duration_cast(translate_end - translate_start); - - // Add the executable to private and shared data stores - ExecutableInfo executable_info; - executable_info.executable = (Executable)mci.address(); - executable_info.size = mci.size(); - executable_info.num_instructions = m_num_instructions; - executable_info.unhit_blocks_list = std::move(m_current_function_unhit_blocks_list); - executable_info.llvm_function = m_current_function; - m_compiled[std::make_pair(address, ~revision)] = executable_info; - - { - std::lock_guard lock(m_compiled_shared_lock); - m_compiled_shared[std::make_pair(address, ~revision)] = std::make_pair(executable_info.executable, 0); - } - - if (revision) { - m_revision.fetch_add(1, std::memory_order_relaxed); - } - - auto compilation_end = std::chrono::high_resolution_clock::now(); - m_compilation_time += std::chrono::duration_cast(compilation_end - compilation_start); -} - -void PPULLVMRecompiler::RemoveUnusedOldVersions() { - u32 num_removed = 0; - u32 prev_address = 0; - for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) { - u32 current_address = i->first.first; - if (prev_address == current_address) { - bool erase_this_entry = false; - - { - std::lock_guard lock(m_compiled_shared_lock); - auto j = m_compiled_shared.find(i->first); - if (j->second.second == 0) { - m_compiled_shared.erase(j); - erase_this_entry = true; - } - } - - if (erase_this_entry) { - auto tmp = i; - i--; - m_execution_engine->freeMachineCodeForFunction(tmp->second.llvm_function); - tmp->second.llvm_function->eraseFromParent(); - m_compiled.erase(tmp); - num_removed++; - } - } - - prev_address = current_address; - } - - if (num_removed > 0) { - LOG_NOTICE(PPU, "Removed %u old versions", num_removed); - } -} - -bool PPULLVMRecompiler::NeedsCompiling(u32 address) { - auto i = m_compiled.lower_bound(std::make_pair(address, 0)); - if (i != m_compiled.end() && i->first.first == address) { - if (i->second.num_instructions >= 300) { - // This section has reached its limit. Don't allow further expansion. - return false; - } - - // If any of the unhit blocks in this function have been hit, then recompile this section - for (auto j = i->second.unhit_blocks_list.begin(); j != i->second.unhit_blocks_list.end(); j++) { - if (m_hit_blocks.find(*j) != m_hit_blocks.end()) { - return true; - } - } - - return false; - } else { - // This section has not been encountered before - return true; - } -} - -Value * PPULLVMRecompiler::GetPPUState() { +Value * Compiler::GetPPUStateArg() { return m_current_function->arg_begin(); } -Value * PPULLVMRecompiler::GetInterpreter() { +Value * Compiler::GetInterpreterArg() { auto i = m_current_function->arg_begin(); i++; return i; } -Value * PPULLVMRecompiler::GetBit(Value * val, u32 n) { +Value * Compiler::GetTracerArg() { + auto i = m_current_function->arg_begin(); + i++; + i++; + return i; +} + +Value * Compiler::GetBit(Value * val, u32 n) { Value * bit; #ifdef PPU_LLVM_RECOMPILER_USE_BMI @@ -4291,11 +4134,11 @@ Value * PPULLVMRecompiler::GetBit(Value * val, u32 n) { return bit; } -Value * PPULLVMRecompiler::ClrBit(Value * val, u32 n) { +Value * Compiler::ClrBit(Value * val, u32 n) { return m_ir_builder->CreateAnd(val, ~((u64)1 << (val->getType()->getIntegerBitWidth() - n - 1))); } -Value * PPULLVMRecompiler::SetBit(Value * val, u32 n, Value * bit, bool doClear) { +Value * Compiler::SetBit(Value * val, u32 n, Value * bit, bool doClear) { if (doClear) { val = ClrBit(val, n); } @@ -4313,7 +4156,7 @@ Value * PPULLVMRecompiler::SetBit(Value * val, u32 n, Value * bit, bool doClear) return m_ir_builder->CreateOr(val, bit); } -Value * PPULLVMRecompiler::GetNibble(Value * val, u32 n) { +Value * Compiler::GetNibble(Value * val, u32 n) { Value * nibble; #ifdef PPU_LLVM_RECOMPILER_USE_BMI @@ -4335,11 +4178,11 @@ Value * PPULLVMRecompiler::GetNibble(Value * val, u32 n) { return nibble; } -Value * PPULLVMRecompiler::ClrNibble(Value * val, u32 n) { +Value * Compiler::ClrNibble(Value * val, u32 n) { return m_ir_builder->CreateAnd(val, ~((u64)0xF << ((((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4))); } -Value * PPULLVMRecompiler::SetNibble(Value * val, u32 n, Value * nibble, bool doClear) { +Value * Compiler::SetNibble(Value * val, u32 n, Value * nibble, bool doClear) { if (doClear) { val = ClrNibble(val, n); } @@ -4357,7 +4200,7 @@ Value * PPULLVMRecompiler::SetNibble(Value * val, u32 n, Value * nibble, bool do return m_ir_builder->CreateOr(val, nibble); } -Value * PPULLVMRecompiler::SetNibble(Value * val, u32 n, Value * b0, Value * b1, Value * b2, Value * b3, bool doClear) { +Value * Compiler::SetNibble(Value * val, u32 n, Value * b0, Value * b1, Value * b2, Value * b3, bool doClear) { if (doClear) { val = ClrNibble(val, n); } @@ -4381,58 +4224,58 @@ Value * PPULLVMRecompiler::SetNibble(Value * val, u32 n, Value * b0, Value * b1, return val; } -Value * PPULLVMRecompiler::GetPc() { - auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, PC)); +Value * Compiler::GetPc() { + auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, PC)); auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(pc_i32_ptr, 4); } -void PPULLVMRecompiler::SetPc(Value * val_ix) { - auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, PC)); +void Compiler::SetPc(Value * val_ix) { + auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, PC)); auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_ix, m_ir_builder->getInt32Ty()); m_ir_builder->CreateAlignedStore(val_i32, pc_i32_ptr, 4); } -Value * PPULLVMRecompiler::GetGpr(u32 r, u32 num_bits) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, GPR[r])); +Value * Compiler::GetGpr(u32 r, u32 num_bits) { + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, GPR[r])); auto r_ix_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getIntNTy(num_bits)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(r_ix_ptr, 8); } -void PPULLVMRecompiler::SetGpr(u32 r, Value * val_x64) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, GPR[r])); +void Compiler::SetGpr(u32 r, Value * val_x64) { + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, GPR[r])); auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); m_ir_builder->CreateAlignedStore(val_i64, r_i64_ptr, 8); } -Value * PPULLVMRecompiler::GetCr() { - auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, CR)); +Value * Compiler::GetCr() { + auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, CR)); auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(cr_i32_ptr, 4); } -Value * PPULLVMRecompiler::GetCrField(u32 n) { +Value * Compiler::GetCrField(u32 n) { return GetNibble(GetCr(), n); } -void PPULLVMRecompiler::SetCr(Value * val_x32) { +void Compiler::SetCr(Value * val_x32) { auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, CR)); + auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, CR)); auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i32, cr_i32_ptr, 4); } -void PPULLVMRecompiler::SetCrField(u32 n, Value * field) { +void Compiler::SetCrField(u32 n, Value * field) { SetCr(SetNibble(GetCr(), n, field)); } -void PPULLVMRecompiler::SetCrField(u32 n, Value * b0, Value * b1, Value * b2, Value * b3) { +void Compiler::SetCrField(u32 n, Value * b0, Value * b1, Value * b2, Value * b3) { SetCr(SetNibble(GetCr(), n, b0, b1, b2, b3)); } -void PPULLVMRecompiler::SetCrFieldSignedCmp(u32 n, Value * a, Value * b) { +void Compiler::SetCrFieldSignedCmp(u32 n, Value * a, Value * b) { auto lt_i1 = m_ir_builder->CreateICmpSLT(a, b); auto gt_i1 = m_ir_builder->CreateICmpSGT(a, b); auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); @@ -4441,7 +4284,7 @@ void PPULLVMRecompiler::SetCrFieldSignedCmp(u32 n, Value * a, Value * b) { SetCr(cr_i32); } -void PPULLVMRecompiler::SetCrFieldUnsignedCmp(u32 n, Value * a, Value * b) { +void Compiler::SetCrFieldUnsignedCmp(u32 n, Value * a, Value * b) { auto lt_i1 = m_ir_builder->CreateICmpULT(a, b); auto gt_i1 = m_ir_builder->CreateICmpUGT(a, b); auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); @@ -4450,7 +4293,7 @@ void PPULLVMRecompiler::SetCrFieldUnsignedCmp(u32 n, Value * a, Value * b) { SetCr(cr_i32); } -void PPULLVMRecompiler::SetCr6AfterVectorCompare(u32 vr) { +void Compiler::SetCr6AfterVectorCompare(u32 vr) { auto vr_v16i8 = GetVrAsIntVec(vr, 8); auto vr_mask_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmovmskb_128), vr_v16i8); auto cmp0_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0)); @@ -4460,80 +4303,80 @@ void PPULLVMRecompiler::SetCr6AfterVectorCompare(u32 vr) { SetCr(cr_i32); } -Value * PPULLVMRecompiler::GetLr() { - auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, LR)); +Value * Compiler::GetLr() { + auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, LR)); auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(lr_i64_ptr, 8); } -void PPULLVMRecompiler::SetLr(Value * val_x64) { +void Compiler::SetLr(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, LR)); + auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, LR)); auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, lr_i64_ptr, 8); } -Value * PPULLVMRecompiler::GetCtr() { - auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, CTR)); +Value * Compiler::GetCtr() { + auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, CTR)); auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(ctr_i64_ptr, 8); } -void PPULLVMRecompiler::SetCtr(Value * val_x64) { +void Compiler::SetCtr(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, CTR)); + auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, CTR)); auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, ctr_i64_ptr, 8); } -Value * PPULLVMRecompiler::GetXer() { - auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, XER)); +Value * Compiler::GetXer() { + auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, XER)); auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(xer_i64_ptr, 8); } -Value * PPULLVMRecompiler::GetXerCa() { +Value * Compiler::GetXerCa() { return GetBit(GetXer(), 34); } -Value * PPULLVMRecompiler::GetXerSo() { +Value * Compiler::GetXerSo() { return GetBit(GetXer(), 32); } -void PPULLVMRecompiler::SetXer(Value * val_x64) { +void Compiler::SetXer(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, XER)); + auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, XER)); auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, xer_i64_ptr, 8); } -void PPULLVMRecompiler::SetXerCa(Value * ca) { +void Compiler::SetXerCa(Value * ca) { auto xer_i64 = GetXer(); xer_i64 = SetBit(xer_i64, 34, ca); SetXer(xer_i64); } -void PPULLVMRecompiler::SetXerSo(Value * so) { +void Compiler::SetXerSo(Value * so) { auto xer_i64 = GetXer(); xer_i64 = SetBit(xer_i64, 32, so); SetXer(xer_i64); } -Value * PPULLVMRecompiler::GetUsprg0() { - auto usrpg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, USPRG0)); +Value * Compiler::GetUsprg0() { + auto usrpg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, USPRG0)); auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usrpg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(usprg0_i64_ptr, 8); } -void PPULLVMRecompiler::SetUsprg0(Value * val_x64) { +void Compiler::SetUsprg0(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto usprg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, USPRG0)); + auto usprg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, USPRG0)); auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usprg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, usprg0_i64_ptr, 8); } -Value * PPULLVMRecompiler::GetFpr(u32 r, u32 bits, bool as_int) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, FPR[r])); +Value * Compiler::GetFpr(u32 r, u32 bits, bool as_int) { + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, FPR[r])); if (!as_int) { auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); auto r_f64 = m_ir_builder->CreateAlignedLoad(r_f64_ptr, 8); @@ -4553,8 +4396,8 @@ Value * PPULLVMRecompiler::GetFpr(u32 r, u32 bits, bool as_int) { } } -void PPULLVMRecompiler::SetFpr(u32 r, Value * val) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, FPR[r])); +void Compiler::SetFpr(u32 r, Value * val) { + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, FPR[r])); auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); Value* val_f64; @@ -4570,54 +4413,54 @@ void PPULLVMRecompiler::SetFpr(u32 r, Value * val) { m_ir_builder->CreateAlignedStore(val_f64, r_f64_ptr, 8); } -Value * PPULLVMRecompiler::GetVscr() { - auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VSCR)); +Value * Compiler::GetVscr() { + auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VSCR)); auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vscr_i32_ptr, 4); } -void PPULLVMRecompiler::SetVscr(Value * val_x32) { +void Compiler::SetVscr(Value * val_x32) { auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VSCR)); + auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VSCR)); auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i32, vscr_i32_ptr, 4); } -Value * PPULLVMRecompiler::GetVr(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr])); +Value * Compiler::GetVr(u32 vr) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vr_i128_ptr, 16); } -Value * PPULLVMRecompiler::GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr])); +Value * Compiler::GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); auto vr_vec_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getIntNTy(vec_elt_num_bits), 128 / vec_elt_num_bits)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vr_vec_ptr, 16); } -Value * PPULLVMRecompiler::GetVrAsFloatVec(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr])); +Value * Compiler::GetVrAsFloatVec(u32 vr) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); auto vr_v4f32_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getFloatTy(), 4)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vr_v4f32_ptr, 16); } -Value * PPULLVMRecompiler::GetVrAsDoubleVec(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr])); +Value * Compiler::GetVrAsDoubleVec(u32 vr) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); auto vr_v2f64_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getDoubleTy(), 2)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vr_v2f64_ptr, 16); } -void PPULLVMRecompiler::SetVr(u32 vr, Value * val_x128) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr])); +void Compiler::SetVr(u32 vr, Value * val_x128) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); auto val_i128 = m_ir_builder->CreateBitCast(val_x128, m_ir_builder->getIntNTy(128)); m_ir_builder->CreateAlignedStore(val_i128, vr_i128_ptr, 16); } -Value * PPULLVMRecompiler::CheckBranchCondition(u32 bo, u32 bi) { +Value * Compiler::CheckBranchCondition(u32 bo, u32 bi) { bool bo0 = bo & 0x10 ? true : false; bool bo1 = bo & 0x08 ? true : false; bool bo2 = bo & 0x04 ? true : false; @@ -4659,54 +4502,49 @@ Value * PPULLVMRecompiler::CheckBranchCondition(u32 bo, u32 bi) { return cmp_i1; } -void PPULLVMRecompiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk) { +void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk, bool target_is_lr) { if (lk) { SetLr(m_ir_builder->getInt64(m_current_instruction_address + 4)); } - auto current_block = m_ir_builder->GetInsertBlock(); + auto current_block = m_ir_builder->GetInsertBlock(); + BasicBlock * target_block = nullptr; if (dyn_cast(target_i64)) { // Target address is an immediate value. u32 target_address = (u32)(dyn_cast(target_i64)->getLimitedValue()); - target_block = GetBlockInFunction(target_address, m_current_function); + target_block = GetBasicBlockFromAddress(target_address, m_current_function); if (!target_block) { - target_block = GetBlockInFunction(target_address, m_current_function, true); - if ((m_hit_blocks.find(target_address) != m_hit_blocks.end() || !cmp_i1) && m_num_instructions < 300) { - // Target block has either been hit or this is an unconditional branch. - m_current_function_uncompiled_blocks_list.push_back(target_address); - m_hit_blocks.insert(target_address); - } else { - // Target block has not been encountered yet and this is not an unconditional branch - m_ir_builder->SetInsertPoint(target_block); - SetPc(target_i64); - m_ir_builder->CreateRetVoid(); - m_current_function_unhit_blocks_list.push_back(target_address); - } + target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function); + m_ir_builder->SetInsertPoint(target_block); + SetPc(target_i64); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function, true)); } } else { - // Target addres is in a register + // Target address is in a register target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function); m_ir_builder->SetInsertPoint(target_block); SetPc(target_i64); - m_ir_builder->CreateRetVoid(); + + if (target_is_lr && !lk) { + // Return from function call + m_ir_builder->CreateRetVoid(); + } else { + auto switch_instr = m_ir_builder->CreateSwitch(target_i64, GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function, true)); + for (auto i = m_current_block_next_blocks->begin(); i != m_current_block_next_blocks->end(); i++) { + switch_instr->addCase(m_ir_builder->getInt32(i->address), GetBasicBlockFromAddress(i->address, m_current_function)); + } + } } if (cmp_i1) { // Conditional branch - auto next_block = GetBlockInFunction(m_current_instruction_address + 4, m_current_function); + auto next_block = GetBasicBlockFromAddress(m_current_instruction_address + 4, m_current_function); if (!next_block) { - next_block = GetBlockInFunction(m_current_instruction_address + 4, m_current_function, true); - if (m_hit_blocks.find(m_current_instruction_address + 4) != m_hit_blocks.end() && m_num_instructions < 300) { - // Next block has already been hit. - m_current_function_uncompiled_blocks_list.push_back(m_current_instruction_address + 4); - } else { - // Next block has not been encountered yet - m_ir_builder->SetInsertPoint(next_block); - SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4)); - m_ir_builder->CreateRetVoid(); - m_current_function_unhit_blocks_list.push_back(m_current_instruction_address + 4); - } + next_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function); + m_ir_builder->SetInsertPoint(next_block); + SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4)); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function, true)); } m_ir_builder->SetInsertPoint(current_block); @@ -4720,7 +4558,7 @@ void PPULLVMRecompiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_ m_hit_branch_instruction = true; } -Value * PPULLVMRecompiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bswap, bool could_be_mmio) { +Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bswap, bool could_be_mmio) { if (bits != 32 || could_be_mmio == false) { auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, m_ir_builder->getIntNTy(bits)->getPointerTo()); @@ -4760,7 +4598,7 @@ Value * PPULLVMRecompiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(else_bb); - auto val_else_i32 = Call("vm_read32", (u32(*)(u64))vm::read32, addr_i64); + auto val_else_i32 = Call("vm.read32", (u32(*)(u64))vm::read32, addr_i64); if (!bswap) { val_else_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_else_i32); } @@ -4774,7 +4612,7 @@ Value * PPULLVMRecompiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, } } -void PPULLVMRecompiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool bswap, bool could_be_mmio) { +void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool bswap, bool could_be_mmio) { addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); if (val_ix->getType()->getIntegerBitWidth() != 32 || could_be_mmio == false) { if (val_ix->getType()->getIntegerBitWidth() > 8 && bswap) { @@ -4820,7 +4658,7 @@ void PPULLVMRecompiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignm val_else_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_else_i32); } - Call("vm_write32", (void(*)(u64, u32))vm::write32, addr_i64, val_else_i32); + Call("vm.write32", (void(*)(u64, u32))vm::write32, addr_i64, val_else_i32); m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(merge_bb); @@ -4828,19 +4666,19 @@ void PPULLVMRecompiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignm } template -Value * PPULLVMRecompiler::InterpreterCall(const char * name, Func function, Args... args) { - auto i = m_interpreter_fallback_stats.find(name); - if (i == m_interpreter_fallback_stats.end()) { - i = m_interpreter_fallback_stats.insert(m_interpreter_fallback_stats.end(), std::make_pair(name, 0)); +Value * Compiler::InterpreterCall(const char * name, Func function, Args... args) { + auto i = m_stats.interpreter_fallback_stats.find(name); + if (i == m_stats.interpreter_fallback_stats.end()) { + i = m_stats.interpreter_fallback_stats.insert(m_stats.interpreter_fallback_stats.end(), std::make_pair(name, 0)); } i->second++; - return Call(name, function, GetInterpreter(), m_ir_builder->getInt32(args)...); + return Call(name, function, GetInterpreterArg(), m_ir_builder->getInt32(args)...); } template -Type * PPULLVMRecompiler::CppToLlvmType() { +Type * Compiler::CppToLlvmType() { if (std::is_void::value) { return m_ir_builder->getVoidTy(); } else if (std::is_same::value || std::is_same::value) { @@ -4865,7 +4703,7 @@ Type * PPULLVMRecompiler::CppToLlvmType() { } template -Value * PPULLVMRecompiler::Call(const char * name, Func function, Args... args) { +Value * Compiler::Call(const char * name, Func function, Args... args) { auto fn = m_module->getFunction(name); if (!fn) { std::vector fn_args_type = {args->getType()...}; @@ -4879,7 +4717,22 @@ Value * PPULLVMRecompiler::Call(const char * name, Func function, Args... args) return m_ir_builder->CreateCall(fn, fn_args); } -void PPULLVMRecompiler::InitRotateMask() { +bool Compiler::IsBranchInstruction(u32 instruction) { + bool is_branch = false; + u32 field1 = instruction >> 26; + if (field1 == 16 || field1 == 18) { + is_branch = true; + } else if (field1 == 19) { + u32 field2 = (instruction >> 1) & 0x3FF; + if (field2 == 16 || field2 == 528) { + is_branch = true; + } + } + + return is_branch; +} + +void Compiler::InitRotateMask() { for (u32 mb = 0; mb < 64; mb++) { for (u32 me = 0; me < 64; me++) { u64 mask = ((u64)-1 >> mb) ^ ((me >= 63) ? 0 : (u64)-1 >> (me + 1)); @@ -4888,101 +4741,188 @@ void PPULLVMRecompiler::InitRotateMask() { } } -u32 PPULLVMEmulator::s_num_instances = 0; -std::mutex PPULLVMEmulator::s_recompiler_mutex; -PPULLVMRecompiler * PPULLVMEmulator::s_recompiler = nullptr; +std::mutex RecompilationEngine::s_mutex; +std::shared_ptr RecompilationEngine::s_the_instance; -PPULLVMEmulator::PPULLVMEmulator(PPUThread & ppu) +CompiledCodeFragment RecompilationEngine::GetCompiledCodeFragment(u32 address) { + return nullptr; +} + +void ReleaseCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment) { + +} + +u32 RecompilationEngine::GetCurrentRevision() { + return 0; +} + +std::shared_ptr RecompilationEngine::GetInstance() { + if (s_the_instance == nullptr) { + std::lock_guard lock(s_mutex); + s_the_instance = std::shared_ptr(new RecompilationEngine()); + } + + return s_the_instance; +} + +Tracer::Tracer() { + m_trace.reserve(1000); + m_stack.reserve(100); +} + +Tracer::~Tracer() { + Terminate(); +} + +void Tracer::Trace(BranchType branch_type, u32 address) { + ExecutionTrace * execution_trace = nullptr; + BlockId block_id; + int function; + int start; + + block_id.address = address; + block_id.type = branch_type; + switch (branch_type) { + case FunctionCall: + m_stack.push_back((u32)m_trace.size()); + m_trace.push_back(block_id); + break; + case Block: + function = m_stack.back(); + for (int i = (int)m_trace.size() - 1; i >= function; i--) { + if (m_trace[i].address == address) { + // Found a loop within the current function + execution_trace = new ExecutionTrace(); + execution_trace->type = ExecutionTrace::Loop; + execution_trace->function_address = m_trace[function].address; + execution_trace->blocks.insert(execution_trace->blocks.begin(), m_trace.begin() + i, m_trace.end()); + m_trace.erase(m_trace.begin() + i + 1, m_trace.end()); + break; + } + } + + if (!execution_trace) { + // A loop was not found + m_trace.push_back(block_id); + } + break; + case Return: + function = m_stack.back(); + m_stack.pop_back(); + + start = function; + + execution_trace = new ExecutionTrace(); + execution_trace->function_address = m_trace[function].address; + execution_trace->type = ExecutionTrace::Linear; + execution_trace->blocks.insert(execution_trace->blocks.begin(), m_trace.begin() + start, m_trace.end()); + m_trace.erase(m_trace.begin() + start + 1, m_trace.end()); + break; + case None: + break; + default: + assert(0); + break; + } + + if (execution_trace) { + auto s = fmt::Format("Trace: 0x%08X, %s -> ", execution_trace->function_address, execution_trace->type == ExecutionTrace::Loop ? "Loop" : "Linear"); + for (auto i = 0; i < execution_trace->blocks.size(); i++) { + s += fmt::Format("0x%08X ", execution_trace->blocks[i]); + } + + LOG_NOTICE(PPU, s.c_str()); + delete execution_trace; + // TODO: Notify recompilation engine + } +} + +void Tracer::Terminate() { + // TODO: Notify recompilation engine +} + +ppu_recompiler_llvm::ExecutionEngine::ExecutionEngine(PPUThread & ppu) : m_ppu(ppu) , m_interpreter(new PPUInterpreter(ppu)) , m_decoder(m_interpreter) - , m_last_instr_was_branch(true) + , m_last_branch_type(FunctionCall) , m_last_cache_clear_time(std::chrono::high_resolution_clock::now()) - , m_recompiler_revision(0) { - std::lock_guard lock(s_recompiler_mutex); + , m_recompiler_revision(0) + , m_recompilation_engine(RecompilationEngine::GetInstance()) { +} - s_num_instances++; - if (!s_recompiler) { - s_recompiler = new PPULLVMRecompiler(); - s_recompiler->RunAllTests(&m_ppu, m_interpreter); +ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() { + for (auto iter = m_address_to_compiled_code_fragment.begin(); iter != m_address_to_compiled_code_fragment.end(); iter++) { + m_recompilation_engine->ReleaseCompiledCodeFragment(iter->second.first); } } -PPULLVMEmulator::~PPULLVMEmulator() { - for (auto iter = m_address_to_executable.begin(); iter != m_address_to_executable.end(); iter++) { - s_recompiler->ReleaseExecutable(iter->first, iter->second.revision); - } - - std::lock_guard lock(s_recompiler_mutex); - - s_num_instances--; - if (s_recompiler && s_num_instances == 0) { - delete s_recompiler; - s_recompiler = nullptr; - } -} - -u8 PPULLVMEmulator::DecodeMemory(const u32 address) { +u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { auto now = std::chrono::high_resolution_clock::now(); - if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 1000) { - bool clear_all = false; + if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 10000) { + bool clear_all = false; - u32 revision = s_recompiler->GetCurrentRevision(); + u32 revision = m_recompilation_engine->GetCurrentRevision(); if (m_recompiler_revision != revision) { m_recompiler_revision = revision; clear_all = true; } - for (auto iter = m_address_to_executable.begin(); iter != m_address_to_executable.end();) { - auto tmp = iter; - iter++; - if (tmp->second.num_hits == 0 || clear_all) { - m_address_to_executable.erase(tmp); - s_recompiler->ReleaseExecutable(tmp->first, tmp->second.revision); + for (auto i = m_address_to_compiled_code_fragment.begin(); i != m_address_to_compiled_code_fragment.end();) { + auto tmp = i; + i++; + if (tmp->second.second == 0 || clear_all) { + m_address_to_compiled_code_fragment.erase(tmp); + m_recompilation_engine->ReleaseCompiledCodeFragment(tmp->second.first); } else { - tmp->second.num_hits = 0; + tmp->second.second = 0; } } m_last_cache_clear_time = now; } - auto address_to_executable_iter = m_address_to_executable.find(address); - if (address_to_executable_iter == m_address_to_executable.end()) { - auto executable_and_revision = s_recompiler->GetExecutable(address); - if (executable_and_revision.first) { - ExecutableInfo executable_info; - executable_info.executable = executable_and_revision.first; - executable_info.revision = executable_and_revision.second; - executable_info.num_hits = 0; - - address_to_executable_iter = m_address_to_executable.insert(m_address_to_executable.end(), std::make_pair(address, executable_info)); - m_uncompiled.erase(address); - } else { - if (m_last_instr_was_branch) { - auto uncompiled_iter = m_uncompiled.find(address); - if (uncompiled_iter != m_uncompiled.end()) { - uncompiled_iter->second++; - if ((uncompiled_iter->second % 1000) == 0) { - s_recompiler->RequestCompilation(address); - } - } else { - m_uncompiled[address] = 0; - } - } + auto i = m_address_to_compiled_code_fragment.find(address); + if (i == m_address_to_compiled_code_fragment.end()) { + auto compiled_code_fragment = m_recompilation_engine->GetCompiledCodeFragment(address); + if (compiled_code_fragment) { + i = m_address_to_compiled_code_fragment.insert(m_address_to_compiled_code_fragment.end(), std::make_pair(address, std::make_pair(compiled_code_fragment, 0))); } } u8 ret = 0; - if (address_to_executable_iter != m_address_to_executable.end()) { - address_to_executable_iter->second.executable(&m_ppu, m_interpreter); - address_to_executable_iter->second.num_hits++; - m_last_instr_was_branch = true; + if (i != m_address_to_compiled_code_fragment.end()) { + m_last_branch_type = None; + i->second.second++; + i->second.first(&m_ppu, m_interpreter); } else { - ret = m_decoder.DecodeMemory(address); - m_last_instr_was_branch = m_ppu.m_is_branch; + if (m_last_branch_type != None) { + m_tracer.Trace(m_last_branch_type, address); + } + + ret = m_decoder.DecodeMemory(address); + m_last_branch_type = m_ppu.m_is_branch ? GetBranchTypeFromInstruction(vm::read32(address)) : None; } return ret; } + +BranchType GetBranchTypeFromInstruction(u32 instruction) { + auto type = BranchType::None; + auto field1 = instruction >> 26; + auto lk = instruction & 1; + + if (field1 == 16 || field1 == 18) { + type = lk ? FunctionCall : Block; + } else if (field1 == 19) { + u32 field2 = (instruction >> 1) & 0x3FF; + if (field2 == 16) { + type = lk ? FunctionCall : Return; + } else if (field2 == 528) { + type = lk ? FunctionCall : Block; + } + } + + return type; +} diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 218dad9721..a93da8021b 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -11,779 +11,822 @@ #include "llvm/ExecutionEngine/JIT.h" #include "llvm/PassManager.h" -struct PPUState; - -/// PPU recompiler that uses LLVM for code generation and optimization -class PPULLVMRecompiler : public ThreadBase, protected PPUOpcodes, protected PPCDecoder { -public: - typedef void(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter); - - PPULLVMRecompiler(); - - PPULLVMRecompiler(const PPULLVMRecompiler & other) = delete; - PPULLVMRecompiler(PPULLVMRecompiler && other) = delete; - - virtual ~PPULLVMRecompiler(); - - PPULLVMRecompiler & operator = (const PPULLVMRecompiler & other) = delete; - PPULLVMRecompiler & operator = (PPULLVMRecompiler && other) = delete; - - /// Get the executable for the code starting at address - std::pair GetExecutable(u32 address); - - /// Release an executable earlier obtained through GetExecutable - void ReleaseExecutable(u32 address, u32 revision); - - /// Request the code at the sepcified address to be compiled - void RequestCompilation(u32 address); - - /// Get the current revision - u32 GetCurrentRevision(); - - /// Execute all tests - void RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter); - - void Task() override; - -protected: - void Decode(const u32 code) override; - - void NULL_OP() override; - void NOP() override; - - void TDI(u32 to, u32 ra, s32 simm16) override; - void TWI(u32 to, u32 ra, s32 simm16) override; - - void MFVSCR(u32 vd) override; - void MTVSCR(u32 vb) override; - void VADDCUW(u32 vd, u32 va, u32 vb) override; - void VADDFP(u32 vd, u32 va, u32 vb) override; - void VADDSBS(u32 vd, u32 va, u32 vb) override; - void VADDSHS(u32 vd, u32 va, u32 vb) override; - void VADDSWS(u32 vd, u32 va, u32 vb) override; - void VADDUBM(u32 vd, u32 va, u32 vb) override; - void VADDUBS(u32 vd, u32 va, u32 vb) override; - void VADDUHM(u32 vd, u32 va, u32 vb) override; - void VADDUHS(u32 vd, u32 va, u32 vb) override; - void VADDUWM(u32 vd, u32 va, u32 vb) override; - void VADDUWS(u32 vd, u32 va, u32 vb) override; - void VAND(u32 vd, u32 va, u32 vb) override; - void VANDC(u32 vd, u32 va, u32 vb) override; - void VAVGSB(u32 vd, u32 va, u32 vb) override; - void VAVGSH(u32 vd, u32 va, u32 vb) override; - void VAVGSW(u32 vd, u32 va, u32 vb) override; - void VAVGUB(u32 vd, u32 va, u32 vb) override; - void VAVGUH(u32 vd, u32 va, u32 vb) override; - void VAVGUW(u32 vd, u32 va, u32 vb) override; - void VCFSX(u32 vd, u32 uimm5, u32 vb) override; - void VCFUX(u32 vd, u32 uimm5, u32 vb) override; - void VCMPBFP(u32 vd, u32 va, u32 vb) override; - void VCMPBFP_(u32 vd, u32 va, u32 vb) override; - void VCMPEQFP(u32 vd, u32 va, u32 vb) override; - void VCMPEQFP_(u32 vd, u32 va, u32 vb) override; - void VCMPEQUB(u32 vd, u32 va, u32 vb) override; - void VCMPEQUB_(u32 vd, u32 va, u32 vb) override; - void VCMPEQUH(u32 vd, u32 va, u32 vb) override; - void VCMPEQUH_(u32 vd, u32 va, u32 vb) override; - void VCMPEQUW(u32 vd, u32 va, u32 vb) override; - void VCMPEQUW_(u32 vd, u32 va, u32 vb) override; - void VCMPGEFP(u32 vd, u32 va, u32 vb) override; - void VCMPGEFP_(u32 vd, u32 va, u32 vb) override; - void VCMPGTFP(u32 vd, u32 va, u32 vb) override; - void VCMPGTFP_(u32 vd, u32 va, u32 vb) override; - void VCMPGTSB(u32 vd, u32 va, u32 vb) override; - void VCMPGTSB_(u32 vd, u32 va, u32 vb) override; - void VCMPGTSH(u32 vd, u32 va, u32 vb) override; - void VCMPGTSH_(u32 vd, u32 va, u32 vb) override; - void VCMPGTSW(u32 vd, u32 va, u32 vb) override; - void VCMPGTSW_(u32 vd, u32 va, u32 vb) override; - void VCMPGTUB(u32 vd, u32 va, u32 vb) override; - void VCMPGTUB_(u32 vd, u32 va, u32 vb) override; - void VCMPGTUH(u32 vd, u32 va, u32 vb) override; - void VCMPGTUH_(u32 vd, u32 va, u32 vb) override; - void VCMPGTUW(u32 vd, u32 va, u32 vb) override; - void VCMPGTUW_(u32 vd, u32 va, u32 vb) override; - void VCTSXS(u32 vd, u32 uimm5, u32 vb) override; - void VCTUXS(u32 vd, u32 uimm5, u32 vb) override; - void VEXPTEFP(u32 vd, u32 vb) override; - void VLOGEFP(u32 vd, u32 vb) override; - void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) override; - void VMAXFP(u32 vd, u32 va, u32 vb) override; - void VMAXSB(u32 vd, u32 va, u32 vb) override; - void VMAXSH(u32 vd, u32 va, u32 vb) override; - void VMAXSW(u32 vd, u32 va, u32 vb) override; - void VMAXUB(u32 vd, u32 va, u32 vb) override; - void VMAXUH(u32 vd, u32 va, u32 vb) override; - void VMAXUW(u32 vd, u32 va, u32 vb) override; - void VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMINFP(u32 vd, u32 va, u32 vb) override; - void VMINSB(u32 vd, u32 va, u32 vb) override; - void VMINSH(u32 vd, u32 va, u32 vb) override; - void VMINSW(u32 vd, u32 va, u32 vb) override; - void VMINUB(u32 vd, u32 va, u32 vb) override; - void VMINUH(u32 vd, u32 va, u32 vb) override; - void VMINUW(u32 vd, u32 va, u32 vb) override; - void VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMRGHB(u32 vd, u32 va, u32 vb) override; - void VMRGHH(u32 vd, u32 va, u32 vb) override; - void VMRGHW(u32 vd, u32 va, u32 vb) override; - void VMRGLB(u32 vd, u32 va, u32 vb) override; - void VMRGLH(u32 vd, u32 va, u32 vb) override; - void VMRGLW(u32 vd, u32 va, u32 vb) override; - void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMULESB(u32 vd, u32 va, u32 vb) override; - void VMULESH(u32 vd, u32 va, u32 vb) override; - void VMULEUB(u32 vd, u32 va, u32 vb) override; - void VMULEUH(u32 vd, u32 va, u32 vb) override; - void VMULOSB(u32 vd, u32 va, u32 vb) override; - void VMULOSH(u32 vd, u32 va, u32 vb) override; - void VMULOUB(u32 vd, u32 va, u32 vb) override; - void VMULOUH(u32 vd, u32 va, u32 vb) override; - void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) override; - void VNOR(u32 vd, u32 va, u32 vb) override; - void VOR(u32 vd, u32 va, u32 vb) override; - void VPERM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VPKPX(u32 vd, u32 va, u32 vb) override; - void VPKSHSS(u32 vd, u32 va, u32 vb) override; - void VPKSHUS(u32 vd, u32 va, u32 vb) override; - void VPKSWSS(u32 vd, u32 va, u32 vb) override; - void VPKSWUS(u32 vd, u32 va, u32 vb) override; - void VPKUHUM(u32 vd, u32 va, u32 vb) override; - void VPKUHUS(u32 vd, u32 va, u32 vb) override; - void VPKUWUM(u32 vd, u32 va, u32 vb) override; - void VPKUWUS(u32 vd, u32 va, u32 vb) override; - void VREFP(u32 vd, u32 vb) override; - void VRFIM(u32 vd, u32 vb) override; - void VRFIN(u32 vd, u32 vb) override; - void VRFIP(u32 vd, u32 vb) override; - void VRFIZ(u32 vd, u32 vb) override; - void VRLB(u32 vd, u32 va, u32 vb) override; - void VRLH(u32 vd, u32 va, u32 vb) override; - void VRLW(u32 vd, u32 va, u32 vb) override; - void VRSQRTEFP(u32 vd, u32 vb) override; - void VSEL(u32 vd, u32 va, u32 vb, u32 vc) override; - void VSL(u32 vd, u32 va, u32 vb) override; - void VSLB(u32 vd, u32 va, u32 vb) override; - void VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) override; - void VSLH(u32 vd, u32 va, u32 vb) override; - void VSLO(u32 vd, u32 va, u32 vb) override; - void VSLW(u32 vd, u32 va, u32 vb) override; - void VSPLTB(u32 vd, u32 uimm5, u32 vb) override; - void VSPLTH(u32 vd, u32 uimm5, u32 vb) override; - void VSPLTISB(u32 vd, s32 simm5) override; - void VSPLTISH(u32 vd, s32 simm5) override; - void VSPLTISW(u32 vd, s32 simm5) override; - void VSPLTW(u32 vd, u32 uimm5, u32 vb) override; - void VSR(u32 vd, u32 va, u32 vb) override; - void VSRAB(u32 vd, u32 va, u32 vb) override; - void VSRAH(u32 vd, u32 va, u32 vb) override; - void VSRAW(u32 vd, u32 va, u32 vb) override; - void VSRB(u32 vd, u32 va, u32 vb) override; - void VSRH(u32 vd, u32 va, u32 vb) override; - void VSRO(u32 vd, u32 va, u32 vb) override; - void VSRW(u32 vd, u32 va, u32 vb) override; - void VSUBCUW(u32 vd, u32 va, u32 vb) override; - void VSUBFP(u32 vd, u32 va, u32 vb) override; - void VSUBSBS(u32 vd, u32 va, u32 vb) override; - void VSUBSHS(u32 vd, u32 va, u32 vb) override; - void VSUBSWS(u32 vd, u32 va, u32 vb) override; - void VSUBUBM(u32 vd, u32 va, u32 vb) override; - void VSUBUBS(u32 vd, u32 va, u32 vb) override; - void VSUBUHM(u32 vd, u32 va, u32 vb) override; - void VSUBUHS(u32 vd, u32 va, u32 vb) override; - void VSUBUWM(u32 vd, u32 va, u32 vb) override; - void VSUBUWS(u32 vd, u32 va, u32 vb) override; - void VSUMSWS(u32 vd, u32 va, u32 vb) override; - void VSUM2SWS(u32 vd, u32 va, u32 vb) override; - void VSUM4SBS(u32 vd, u32 va, u32 vb) override; - void VSUM4SHS(u32 vd, u32 va, u32 vb) override; - void VSUM4UBS(u32 vd, u32 va, u32 vb) override; - void VUPKHPX(u32 vd, u32 vb) override; - void VUPKHSB(u32 vd, u32 vb) override; - void VUPKHSH(u32 vd, u32 vb) override; - void VUPKLPX(u32 vd, u32 vb) override; - void VUPKLSB(u32 vd, u32 vb) override; - void VUPKLSH(u32 vd, u32 vb) override; - void VXOR(u32 vd, u32 va, u32 vb) override; - void MULLI(u32 rd, u32 ra, s32 simm16) override; - void SUBFIC(u32 rd, u32 ra, s32 simm16) override; - void CMPLI(u32 bf, u32 l, u32 ra, u32 uimm16) override; - void CMPI(u32 bf, u32 l, u32 ra, s32 simm16) override; - void ADDIC(u32 rd, u32 ra, s32 simm16) override; - void ADDIC_(u32 rd, u32 ra, s32 simm16) override; - void ADDI(u32 rd, u32 ra, s32 simm16) override; - void ADDIS(u32 rd, u32 ra, s32 simm16) override; - void BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) override; - void SC(u32 sc_code) override; - void B(s32 ll, u32 aa, u32 lk) override; - void MCRF(u32 crfd, u32 crfs) override; - void BCLR(u32 bo, u32 bi, u32 bh, u32 lk) override; - void CRNOR(u32 bt, u32 ba, u32 bb) override; - void CRANDC(u32 bt, u32 ba, u32 bb) override; - void ISYNC() override; - void CRXOR(u32 bt, u32 ba, u32 bb) override; - void CRNAND(u32 bt, u32 ba, u32 bb) override; - void CRAND(u32 bt, u32 ba, u32 bb) override; - void CREQV(u32 bt, u32 ba, u32 bb) override; - void CRORC(u32 bt, u32 ba, u32 bb) override; - void CROR(u32 bt, u32 ba, u32 bb) override; - void BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) override; - void RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) override; - void RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) override; - void RLWNM(u32 ra, u32 rs, u32 rb, u32 MB, u32 ME, bool rc) override; - void ORI(u32 rs, u32 ra, u32 uimm16) override; - void ORIS(u32 rs, u32 ra, u32 uimm16) override; - void XORI(u32 ra, u32 rs, u32 uimm16) override; - void XORIS(u32 ra, u32 rs, u32 uimm16) override; - void ANDI_(u32 ra, u32 rs, u32 uimm16) override; - void ANDIS_(u32 ra, u32 rs, u32 uimm16) override; - void RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override; - void RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) override; - void RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override; - void RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override; - void RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) override; - void CMP(u32 crfd, u32 l, u32 ra, u32 rb) override; - void TW(u32 to, u32 ra, u32 rb) override; - void LVSL(u32 vd, u32 ra, u32 rb) override; - void LVEBX(u32 vd, u32 ra, u32 rb) override; - void SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) override; - void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void MULHWU(u32 rd, u32 ra, u32 rb, bool rc) override; - void MFOCRF(u32 a, u32 rd, u32 crm) override; - void LWARX(u32 rd, u32 ra, u32 rb) override; - void LDX(u32 ra, u32 rs, u32 rb) override; - void LWZX(u32 rd, u32 ra, u32 rb) override; - void SLW(u32 ra, u32 rs, u32 rb, bool rc) override; - void CNTLZW(u32 ra, u32 rs, bool rc) override; - void SLD(u32 ra, u32 rs, u32 rb, bool rc) override; - void AND(u32 ra, u32 rs, u32 rb, bool rc) override; - void CMPL(u32 bf, u32 l, u32 ra, u32 rb) override; - void LVSR(u32 vd, u32 ra, u32 rb) override; - void LVEHX(u32 vd, u32 ra, u32 rb) override; - void SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void LDUX(u32 rd, u32 ra, u32 rb) override; - void DCBST(u32 ra, u32 rb) override; - void LWZUX(u32 rd, u32 ra, u32 rb) override; - void CNTLZD(u32 ra, u32 rs, bool rc) override; - void ANDC(u32 ra, u32 rs, u32 rb, bool rc) override; - void TD(u32 to, u32 ra, u32 rb) override; - void LVEWX(u32 vd, u32 ra, u32 rb) override; - void MULHD(u32 rd, u32 ra, u32 rb, bool rc) override; - void MULHW(u32 rd, u32 ra, u32 rb, bool rc) override; - void LDARX(u32 rd, u32 ra, u32 rb) override; - void DCBF(u32 ra, u32 rb) override; - void LBZX(u32 rd, u32 ra, u32 rb) override; - void LVX(u32 vd, u32 ra, u32 rb) override; - void NEG(u32 rd, u32 ra, u32 oe, bool rc) override; - void LBZUX(u32 rd, u32 ra, u32 rb) override; - void NOR(u32 ra, u32 rs, u32 rb, bool rc) override; - void STVEBX(u32 vs, u32 ra, u32 rb) override; - void SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void MTOCRF(u32 l, u32 crm, u32 rs) override; - void STDX(u32 rs, u32 ra, u32 rb) override; - void STWCX_(u32 rs, u32 ra, u32 rb) override; - void STWX(u32 rs, u32 ra, u32 rb) override; - void STVEHX(u32 vs, u32 ra, u32 rb) override; - void STDUX(u32 rs, u32 ra, u32 rb) override; - void STWUX(u32 rs, u32 ra, u32 rb) override; - void STVEWX(u32 vs, u32 ra, u32 rb) override; - void SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) override; - void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) override; - void STDCX_(u32 rs, u32 ra, u32 rb) override; - void STBX(u32 rs, u32 ra, u32 rb) override; - void STVX(u32 vs, u32 ra, u32 rb) override; - void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) override; - void ADDME(u32 rd, u32 ra, u32 oe, bool rc) override; - void MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void DCBTST(u32 ra, u32 rb, u32 th) override; - void STBUX(u32 rs, u32 ra, u32 rb) override; - void ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void DCBT(u32 ra, u32 rb, u32 th) override; - void LHZX(u32 rd, u32 ra, u32 rb) override; - void EQV(u32 ra, u32 rs, u32 rb, bool rc) override; - void ECIWX(u32 rd, u32 ra, u32 rb) override; - void LHZUX(u32 rd, u32 ra, u32 rb) override; - void XOR(u32 rs, u32 ra, u32 rb, bool rc) override; - void MFSPR(u32 rd, u32 spr) override; - void LWAX(u32 rd, u32 ra, u32 rb) override; - void DST(u32 ra, u32 rb, u32 strm, u32 t) override; - void LHAX(u32 rd, u32 ra, u32 rb) override; - void LVXL(u32 vd, u32 ra, u32 rb) override; - void MFTB(u32 rd, u32 spr) override; - void LWAUX(u32 rd, u32 ra, u32 rb) override; - void DSTST(u32 ra, u32 rb, u32 strm, u32 t) override; - void LHAUX(u32 rd, u32 ra, u32 rb) override; - void STHX(u32 rs, u32 ra, u32 rb) override; - void ORC(u32 rs, u32 ra, u32 rb, bool rc) override; - void ECOWX(u32 rs, u32 ra, u32 rb) override; - void STHUX(u32 rs, u32 ra, u32 rb) override; - void OR(u32 ra, u32 rs, u32 rb, bool rc) override; - void DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void MTSPR(u32 spr, u32 rs) override; - //DCBI - void NAND(u32 ra, u32 rs, u32 rb, bool rc) override; - void STVXL(u32 vs, u32 ra, u32 rb) override; - void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; - void LVLX(u32 vd, u32 ra, u32 rb) override; - void LDBRX(u32 rd, u32 ra, u32 rb) override; - void LSWX(u32 rd, u32 ra, u32 rb) override; - void LWBRX(u32 rd, u32 ra, u32 rb) override; - void LFSX(u32 frd, u32 ra, u32 rb) override; - void SRW(u32 ra, u32 rs, u32 rb, bool rc) override; - void SRD(u32 ra, u32 rs, u32 rb, bool rc) override; - void LVRX(u32 vd, u32 ra, u32 rb) override; - void LSWI(u32 rd, u32 ra, u32 nb) override; - void LFSUX(u32 frd, u32 ra, u32 rb) override; - void SYNC(u32 l) override; - void LFDX(u32 frd, u32 ra, u32 rb) override; - void LFDUX(u32 frd, u32 ra, u32 rb) override; - void STVLX(u32 vs, u32 ra, u32 rb) override; - void STSWX(u32 rs, u32 ra, u32 rb) override; - void STWBRX(u32 rs, u32 ra, u32 rb) override; - void STFSX(u32 frs, u32 ra, u32 rb) override; - void STVRX(u32 vs, u32 ra, u32 rb) override; - void STFSUX(u32 frs, u32 ra, u32 rb) override; - void STSWI(u32 rd, u32 ra, u32 nb) override; - void STFDX(u32 frs, u32 ra, u32 rb) override; - void STFDUX(u32 frs, u32 ra, u32 rb) override; - void LVLXL(u32 vd, u32 ra, u32 rb) override; - void LHBRX(u32 rd, u32 ra, u32 rb) override; - void SRAW(u32 ra, u32 rs, u32 rb, bool rc) override; - void SRAD(u32 ra, u32 rs, u32 rb, bool rc) override; - void LVRXL(u32 vd, u32 ra, u32 rb) override; - void DSS(u32 strm, u32 a) override; - void SRAWI(u32 ra, u32 rs, u32 sh, bool rc) override; - void SRADI1(u32 ra, u32 rs, u32 sh, bool rc) override; - void SRADI2(u32 ra, u32 rs, u32 sh, bool rc) override; - void EIEIO() override; - void STVLXL(u32 vs, u32 ra, u32 rb) override; - void STHBRX(u32 rs, u32 ra, u32 rb) override; - void EXTSH(u32 ra, u32 rs, bool rc) override; - void STVRXL(u32 sd, u32 ra, u32 rb) override; - void EXTSB(u32 ra, u32 rs, bool rc) override; - void STFIWX(u32 frs, u32 ra, u32 rb) override; - void EXTSW(u32 ra, u32 rs, bool rc) override; - void ICBI(u32 ra, u32 rb) override; - void DCBZ(u32 ra, u32 rb) override; - void LWZ(u32 rd, u32 ra, s32 d) override; - void LWZU(u32 rd, u32 ra, s32 d) override; - void LBZ(u32 rd, u32 ra, s32 d) override; - void LBZU(u32 rd, u32 ra, s32 d) override; - void STW(u32 rs, u32 ra, s32 d) override; - void STWU(u32 rs, u32 ra, s32 d) override; - void STB(u32 rs, u32 ra, s32 d) override; - void STBU(u32 rs, u32 ra, s32 d) override; - void LHZ(u32 rd, u32 ra, s32 d) override; - void LHZU(u32 rd, u32 ra, s32 d) override; - void LHA(u32 rs, u32 ra, s32 d) override; - void LHAU(u32 rs, u32 ra, s32 d) override; - void STH(u32 rs, u32 ra, s32 d) override; - void STHU(u32 rs, u32 ra, s32 d) override; - void LMW(u32 rd, u32 ra, s32 d) override; - void STMW(u32 rs, u32 ra, s32 d) override; - void LFS(u32 frd, u32 ra, s32 d) override; - void LFSU(u32 frd, u32 ra, s32 d) override; - void LFD(u32 frd, u32 ra, s32 d) override; - void LFDU(u32 frd, u32 ra, s32 d) override; - void STFS(u32 frs, u32 ra, s32 d) override; - void STFSU(u32 frs, u32 ra, s32 d) override; - void STFD(u32 frs, u32 ra, s32 d) override; - void STFDU(u32 frs, u32 ra, s32 d) override; - void LD(u32 rd, u32 ra, s32 ds) override; - void LDU(u32 rd, u32 ra, s32 ds) override; - void LWA(u32 rd, u32 ra, s32 ds) override; - void FDIVS(u32 frd, u32 fra, u32 frb, bool rc) override; - void FSUBS(u32 frd, u32 fra, u32 frb, bool rc) override; - void FADDS(u32 frd, u32 fra, u32 frb, bool rc) override; - void FSQRTS(u32 frd, u32 frb, bool rc) override; - void FRES(u32 frd, u32 frb, bool rc) override; - void FMULS(u32 frd, u32 fra, u32 frc, bool rc) override; - void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void STD(u32 rs, u32 ra, s32 ds) override; - void STDU(u32 rs, u32 ra, s32 ds) override; - void MTFSB1(u32 bt, bool rc) override; - void MCRFS(u32 bf, u32 bfa) override; - void MTFSB0(u32 bt, bool rc) override; - void MTFSFI(u32 crfd, u32 i, bool rc) override; - void MFFS(u32 frd, bool rc) override; - void MTFSF(u32 flm, u32 frb, bool rc) override; - - void FCMPU(u32 bf, u32 fra, u32 frb) override; - void FRSP(u32 frd, u32 frb, bool rc) override; - void FCTIW(u32 frd, u32 frb, bool rc) override; - void FCTIWZ(u32 frd, u32 frb, bool rc) override; - void FDIV(u32 frd, u32 fra, u32 frb, bool rc) override; - void FSUB(u32 frd, u32 fra, u32 frb, bool rc) override; - void FADD(u32 frd, u32 fra, u32 frb, bool rc) override; - void FSQRT(u32 frd, u32 frb, bool rc) override; - void FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void FMUL(u32 frd, u32 fra, u32 frc, bool rc) override; - void FRSQRTE(u32 frd, u32 frb, bool rc) override; - void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; - void FCMPO(u32 crfd, u32 fra, u32 frb) override; - void FNEG(u32 frd, u32 frb, bool rc) override; - void FMR(u32 frd, u32 frb, bool rc) override; - void FNABS(u32 frd, u32 frb, bool rc) override; - void FABS(u32 frd, u32 frb, bool rc) override; - void FCTID(u32 frd, u32 frb, bool rc) override; - void FCTIDZ(u32 frd, u32 frb, bool rc) override; - void FCFID(u32 frd, u32 frb, bool rc) override; - - void UNK(const u32 code, const u32 opcode, const u32 gcode) override; - -private: - struct ExecutableInfo { - /// Pointer to the executable - Executable executable; - - /// Size of the executable - size_t size; - - /// Number of PPU instructions compiled into this executable - u32 num_instructions; - - /// List of blocks that this executable refers to that have not been hit yet - std::list unhit_blocks_list; - - /// LLVM function corresponding to the executable - llvm::Function * llvm_function; +namespace ppu_recompiler_llvm { + /// Branch type + enum BranchType { + None, + FunctionCall, + Block, + Return, }; - /// Lock for accessing m_compiled_shared - // TODO: Use a RW lock - std::mutex m_compiled_shared_lock; + /// Unique id of a block + union BlockId { + u64 block_id; - /// Sections that have been compiled. This data store is shared with the execution threads. - /// Keys are starting address of the section and ~revision. Data is pointer to the executable and its reference count. - std::map, std::pair> m_compiled_shared; + struct { + /// Address of the block + u32 address; - /// Lock for accessing m_uncompiled_shared - std::mutex m_uncompiled_shared_lock; - - /// Current revision. This is incremented everytime a section is compiled. - std::atomic m_revision; - - /// Sections that have not been compiled yet. This data store is shared with the execution threads. - std::list m_uncompiled_shared; - - /// Set of all blocks that have been hit - std::set m_hit_blocks; - - /// Sections that have been compiled. Keys are starting address of the section and ~revision. - std::map, ExecutableInfo> m_compiled; - - /// LLVM context - llvm::LLVMContext * m_llvm_context; - - /// LLVM IR builder - llvm::IRBuilder<> * m_ir_builder; - - /// Module to which all generated code is output to - llvm::Module * m_module; - - /// JIT execution engine - llvm::ExecutionEngine * m_execution_engine; - - /// Function pass manager - llvm::FunctionPassManager * m_fpm; - - /// A flag used to detect branch instructions. - /// This is set to false at the start of compilation of a block. - /// When a branch instruction is encountered, this is set to true by the decode function. - bool m_hit_branch_instruction; - - /// The function being compiled - llvm::Function * m_current_function; - - /// List of blocks to be compiled in the current function being compiled - std::list m_current_function_uncompiled_blocks_list; - - /// List of blocks that the current function refers to but have not been hit yet - std::list m_current_function_unhit_blocks_list; - - /// Address of the current instruction - u32 m_current_instruction_address; - - /// Number of instructions in this section - u32 m_num_instructions; - - /// Time spent building the LLVM IR - std::chrono::nanoseconds m_ir_build_time; - - /// Time spent optimizing - std::chrono::nanoseconds m_optimizing_time; - - /// Time spent translating LLVM IR to machine code - std::chrono::nanoseconds m_translation_time; - - /// Time spent compiling - std::chrono::nanoseconds m_compilation_time; - - /// Time spent idling - std::chrono::nanoseconds m_idling_time; - - /// Total time - std::chrono::nanoseconds m_total_time; - - /// Contains the number of times the interpreter fallback was used - std::map m_interpreter_fallback_stats; - - /// Get the block in function for the instruction at the specified address. - llvm::BasicBlock * GetBlockInFunction(u32 address, llvm::Function * function, bool create_if_not_exist = false); - - /// Compile the section startin at address - void Compile(u32 address); - - /// Remove old versions of executables that are no longer used by any execution thread - void RemoveUnusedOldVersions(); - - /// Test whether the blocks needs to be compiled - bool NeedsCompiling(u32 address); - - /// Get PPU state pointer - llvm::Value * GetPPUState(); - - /// Get interpreter pointer - llvm::Value * GetInterpreter(); - - /// Get a bit - llvm::Value * GetBit(llvm::Value * val, u32 n); - - /// Clear a bit - llvm::Value * ClrBit(llvm::Value * val, u32 n); - - /// Set a bit - llvm::Value * SetBit(llvm::Value * val, u32 n, llvm::Value * bit, bool doClear = true); - - /// Get a nibble - llvm::Value * GetNibble(llvm::Value * val, u32 n); - - /// Clear a nibble - llvm::Value * ClrNibble(llvm::Value * val, u32 n); - - /// Set a nibble - llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * nibble, bool doClear = true); - - /// Set a nibble - llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3, bool doClear = true); - - /// Load PC - llvm::Value * GetPc(); - - /// Set PC - void SetPc(llvm::Value * val_ix); - - /// Load GPR - llvm::Value * GetGpr(u32 r, u32 num_bits = 64); - - /// Set GPR - void SetGpr(u32 r, llvm::Value * val_x64); - - /// Load CR - llvm::Value * GetCr(); - - /// Load CR and get field CRn - llvm::Value * GetCrField(u32 n); - - /// Set CR - void SetCr(llvm::Value * val_x32); - - /// Set CR field - void SetCrField(u32 n, llvm::Value * field); - - /// Set CR field - void SetCrField(u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3); - - /// Set CR field based on signed comparison - void SetCrFieldSignedCmp(u32 n, llvm::Value * a, llvm::Value * b); - - /// Set CR field based on unsigned comparison - void SetCrFieldUnsignedCmp(u32 n, llvm::Value * a, llvm::Value * b); - - /// Set CR6 based on the result of the vector compare instruction - void SetCr6AfterVectorCompare(u32 vr); - - /// Get LR - llvm::Value * GetLr(); - - /// Set LR - void SetLr(llvm::Value * val_x64); - - /// Get CTR - llvm::Value * GetCtr(); - - /// Set CTR - void SetCtr(llvm::Value * val_x64); - - /// Load XER and convert it to an i64 - llvm::Value * GetXer(); - - /// Load XER and return the CA bit - llvm::Value * GetXerCa(); - - /// Load XER and return the SO bit - llvm::Value * GetXerSo(); - - /// Set XER - void SetXer(llvm::Value * val_x64); - - /// Set the CA bit of XER - void SetXerCa(llvm::Value * ca); - - /// Set the SO bit of XER - void SetXerSo(llvm::Value * so); - - /// Get USPRG0 - llvm::Value * GetUsprg0(); - - /// Set USPRG0 - void SetUsprg0(llvm::Value * val_x64); - - /// Get FPR - llvm::Value * GetFpr(u32 r, u32 bits = 64, bool as_int = false); - - /// Set FPR - void SetFpr(u32 r, llvm::Value * val); - - /// Load VSCR - llvm::Value * GetVscr(); - - /// Set VSCR - void SetVscr(llvm::Value * val_x32); - - /// Load VR - llvm::Value * GetVr(u32 vr); - - /// Load VR and convert it to an integer vector - llvm::Value * GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits); - - /// Load VR and convert it to a float vector with 4 elements - llvm::Value * GetVrAsFloatVec(u32 vr); - - /// Load VR and convert it to a double vector with 2 elements - llvm::Value * GetVrAsDoubleVec(u32 vr); - - /// Set VR to the specified value - void SetVr(u32 vr, llvm::Value * val_x128); - - /// Check condition for branch instructions - llvm::Value * CheckBranchCondition(u32 bo, u32 bi); - - /// Create IR for a branch instruction - void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk); - - /// Read from memory - llvm::Value * ReadMemory(llvm::Value * addr_i64, u32 bits, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); - - /// Write to memory - void WriteMemory(llvm::Value * addr_i64, llvm::Value * val_ix, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); - - /// Call an interpreter function - template - llvm::Value * InterpreterCall(const char * name, Func function, Args... args); - - /// Convert a C++ type to an LLVM type - template - llvm::Type * CppToLlvmType(); - - /// Call a function - template - llvm::Value * Call(const char * name, Func function, Args... args); - - /// Test an instruction against the interpreter - template - void VerifyInstructionAgainstInterpreter(const char * name, PPULLVMRecompilerFn recomp_fn, PPUInterpreterFn interp_fn, PPUState & input_state, Args... args); - - /// Excute a test - void RunTest(const char * name, std::function test_case, std::function input, std::function check_result); - - /// A mask used in rotate instructions - static u64 s_rotate_mask[64][64]; - - /// A flag indicating whether s_rotate_mask has been initialised or not - static bool s_rotate_mask_inited; - - /// Initialse s_rotate_mask - static void InitRotateMask(); -}; - -/// PPU emulator that uses LLVM to convert PPU instructions to host CPU instructions -class PPULLVMEmulator : public CPUDecoder { -public: - PPULLVMEmulator(PPUThread & ppu); - PPULLVMEmulator() = delete; - - PPULLVMEmulator(const PPULLVMEmulator & other) = delete; - PPULLVMEmulator(PPULLVMEmulator && other) = delete; - - virtual ~PPULLVMEmulator(); - - PPULLVMEmulator & operator = (const PPULLVMEmulator & other) = delete; - PPULLVMEmulator & operator = (PPULLVMEmulator && other) = delete; - - u8 DecodeMemory(const u32 address) override; - -private: - struct ExecutableInfo { - /// Pointer to the executable - PPULLVMRecompiler::Executable executable; - - /// The revision of the executable - u32 revision; - - /// Number of times the executable was hit - u32 num_hits; + /// The type of the block + BranchType type; + }; }; - /// PPU processor context - PPUThread & m_ppu; + /// An execution trace. + struct ExecutionTrace { + /// The function in which this trace was found + u32 function_address; - /// PPU Interpreter - PPUInterpreter * m_interpreter; + /// Execution trace type + enum { + Linear, + Loop, + } type; - /// PPU instruction Decoder - PPUDecoder m_decoder; + /// Sequence of blocks enountered in this trace + std::vector blocks; + }; - /// Set to true if the last executed instruction was a branch - bool m_last_instr_was_branch; + /// A fragment of PPU code. A list of (block, list of next blocks) pairs. + typedef std::vector>> CodeFragment; - /// The time at which the m_address_to_executable cache was last cleared - std::chrono::high_resolution_clock::time_point m_last_cache_clear_time; + /// Pointer to a function built by compiling a fragment of PPU code + typedef u64(*CompiledCodeFragment)(PPUThread * ppu_state, PPUInterpreter * interpreter); - /// The revision of the recompiler to which this thread is synced - u32 m_recompiler_revision; + struct PPUState; - /// Address to executable map. Key is address. - std::unordered_map m_address_to_executable; + /// PPU compiler that uses LLVM for code generation and optimization + class Compiler : protected PPUOpcodes, protected PPCDecoder { + public: + struct Stats { + /// Time spent building the LLVM IR + std::chrono::nanoseconds ir_build_time; - /// Sections that have not been compiled yet. Key is starting address of the section. - std::unordered_map m_uncompiled; + /// Time spent optimizing + std::chrono::nanoseconds optimization_time; - /// Number of instances of this class - static u32 s_num_instances; + /// Time spent translating LLVM IR to machine code + std::chrono::nanoseconds translation_time; - /// Mutex used prevent multiple instances of the recompiler from being created - static std::mutex s_recompiler_mutex; + /// Total time + std::chrono::nanoseconds total_time; - /// PPU to LLVM recompiler - static PPULLVMRecompiler * s_recompiler; -}; + /// Contains the number of times interpreter fallback was used + std::map interpreter_fallback_stats; + }; + + Compiler(); + + Compiler(const Compiler & other) = delete; + Compiler(Compiler && other) = delete; + + virtual ~Compiler(); + + Compiler & operator = (const Compiler & other) = delete; + Compiler & operator = (Compiler && other) = delete; + + /// Compile a code fragment + CompiledCodeFragment Compile(const std::string & name, const CodeFragment & code_fragment); + + /// Free a compiled code fragment + void FreeCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment); + + /// Retrieve compiler stats + Stats GetStats(); + + /// Execute all tests + void RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter); + + protected: + void Decode(const u32 code) override; + + void NULL_OP() override; + void NOP() override; + + void TDI(u32 to, u32 ra, s32 simm16) override; + void TWI(u32 to, u32 ra, s32 simm16) override; + + void MFVSCR(u32 vd) override; + void MTVSCR(u32 vb) override; + void VADDCUW(u32 vd, u32 va, u32 vb) override; + void VADDFP(u32 vd, u32 va, u32 vb) override; + void VADDSBS(u32 vd, u32 va, u32 vb) override; + void VADDSHS(u32 vd, u32 va, u32 vb) override; + void VADDSWS(u32 vd, u32 va, u32 vb) override; + void VADDUBM(u32 vd, u32 va, u32 vb) override; + void VADDUBS(u32 vd, u32 va, u32 vb) override; + void VADDUHM(u32 vd, u32 va, u32 vb) override; + void VADDUHS(u32 vd, u32 va, u32 vb) override; + void VADDUWM(u32 vd, u32 va, u32 vb) override; + void VADDUWS(u32 vd, u32 va, u32 vb) override; + void VAND(u32 vd, u32 va, u32 vb) override; + void VANDC(u32 vd, u32 va, u32 vb) override; + void VAVGSB(u32 vd, u32 va, u32 vb) override; + void VAVGSH(u32 vd, u32 va, u32 vb) override; + void VAVGSW(u32 vd, u32 va, u32 vb) override; + void VAVGUB(u32 vd, u32 va, u32 vb) override; + void VAVGUH(u32 vd, u32 va, u32 vb) override; + void VAVGUW(u32 vd, u32 va, u32 vb) override; + void VCFSX(u32 vd, u32 uimm5, u32 vb) override; + void VCFUX(u32 vd, u32 uimm5, u32 vb) override; + void VCMPBFP(u32 vd, u32 va, u32 vb) override; + void VCMPBFP_(u32 vd, u32 va, u32 vb) override; + void VCMPEQFP(u32 vd, u32 va, u32 vb) override; + void VCMPEQFP_(u32 vd, u32 va, u32 vb) override; + void VCMPEQUB(u32 vd, u32 va, u32 vb) override; + void VCMPEQUB_(u32 vd, u32 va, u32 vb) override; + void VCMPEQUH(u32 vd, u32 va, u32 vb) override; + void VCMPEQUH_(u32 vd, u32 va, u32 vb) override; + void VCMPEQUW(u32 vd, u32 va, u32 vb) override; + void VCMPEQUW_(u32 vd, u32 va, u32 vb) override; + void VCMPGEFP(u32 vd, u32 va, u32 vb) override; + void VCMPGEFP_(u32 vd, u32 va, u32 vb) override; + void VCMPGTFP(u32 vd, u32 va, u32 vb) override; + void VCMPGTFP_(u32 vd, u32 va, u32 vb) override; + void VCMPGTSB(u32 vd, u32 va, u32 vb) override; + void VCMPGTSB_(u32 vd, u32 va, u32 vb) override; + void VCMPGTSH(u32 vd, u32 va, u32 vb) override; + void VCMPGTSH_(u32 vd, u32 va, u32 vb) override; + void VCMPGTSW(u32 vd, u32 va, u32 vb) override; + void VCMPGTSW_(u32 vd, u32 va, u32 vb) override; + void VCMPGTUB(u32 vd, u32 va, u32 vb) override; + void VCMPGTUB_(u32 vd, u32 va, u32 vb) override; + void VCMPGTUH(u32 vd, u32 va, u32 vb) override; + void VCMPGTUH_(u32 vd, u32 va, u32 vb) override; + void VCMPGTUW(u32 vd, u32 va, u32 vb) override; + void VCMPGTUW_(u32 vd, u32 va, u32 vb) override; + void VCTSXS(u32 vd, u32 uimm5, u32 vb) override; + void VCTUXS(u32 vd, u32 uimm5, u32 vb) override; + void VEXPTEFP(u32 vd, u32 vb) override; + void VLOGEFP(u32 vd, u32 vb) override; + void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) override; + void VMAXFP(u32 vd, u32 va, u32 vb) override; + void VMAXSB(u32 vd, u32 va, u32 vb) override; + void VMAXSH(u32 vd, u32 va, u32 vb) override; + void VMAXSW(u32 vd, u32 va, u32 vb) override; + void VMAXUB(u32 vd, u32 va, u32 vb) override; + void VMAXUH(u32 vd, u32 va, u32 vb) override; + void VMAXUW(u32 vd, u32 va, u32 vb) override; + void VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMINFP(u32 vd, u32 va, u32 vb) override; + void VMINSB(u32 vd, u32 va, u32 vb) override; + void VMINSH(u32 vd, u32 va, u32 vb) override; + void VMINSW(u32 vd, u32 va, u32 vb) override; + void VMINUB(u32 vd, u32 va, u32 vb) override; + void VMINUH(u32 vd, u32 va, u32 vb) override; + void VMINUW(u32 vd, u32 va, u32 vb) override; + void VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMRGHB(u32 vd, u32 va, u32 vb) override; + void VMRGHH(u32 vd, u32 va, u32 vb) override; + void VMRGHW(u32 vd, u32 va, u32 vb) override; + void VMRGLB(u32 vd, u32 va, u32 vb) override; + void VMRGLH(u32 vd, u32 va, u32 vb) override; + void VMRGLW(u32 vd, u32 va, u32 vb) override; + void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMULESB(u32 vd, u32 va, u32 vb) override; + void VMULESH(u32 vd, u32 va, u32 vb) override; + void VMULEUB(u32 vd, u32 va, u32 vb) override; + void VMULEUH(u32 vd, u32 va, u32 vb) override; + void VMULOSB(u32 vd, u32 va, u32 vb) override; + void VMULOSH(u32 vd, u32 va, u32 vb) override; + void VMULOUB(u32 vd, u32 va, u32 vb) override; + void VMULOUH(u32 vd, u32 va, u32 vb) override; + void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) override; + void VNOR(u32 vd, u32 va, u32 vb) override; + void VOR(u32 vd, u32 va, u32 vb) override; + void VPERM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VPKPX(u32 vd, u32 va, u32 vb) override; + void VPKSHSS(u32 vd, u32 va, u32 vb) override; + void VPKSHUS(u32 vd, u32 va, u32 vb) override; + void VPKSWSS(u32 vd, u32 va, u32 vb) override; + void VPKSWUS(u32 vd, u32 va, u32 vb) override; + void VPKUHUM(u32 vd, u32 va, u32 vb) override; + void VPKUHUS(u32 vd, u32 va, u32 vb) override; + void VPKUWUM(u32 vd, u32 va, u32 vb) override; + void VPKUWUS(u32 vd, u32 va, u32 vb) override; + void VREFP(u32 vd, u32 vb) override; + void VRFIM(u32 vd, u32 vb) override; + void VRFIN(u32 vd, u32 vb) override; + void VRFIP(u32 vd, u32 vb) override; + void VRFIZ(u32 vd, u32 vb) override; + void VRLB(u32 vd, u32 va, u32 vb) override; + void VRLH(u32 vd, u32 va, u32 vb) override; + void VRLW(u32 vd, u32 va, u32 vb) override; + void VRSQRTEFP(u32 vd, u32 vb) override; + void VSEL(u32 vd, u32 va, u32 vb, u32 vc) override; + void VSL(u32 vd, u32 va, u32 vb) override; + void VSLB(u32 vd, u32 va, u32 vb) override; + void VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) override; + void VSLH(u32 vd, u32 va, u32 vb) override; + void VSLO(u32 vd, u32 va, u32 vb) override; + void VSLW(u32 vd, u32 va, u32 vb) override; + void VSPLTB(u32 vd, u32 uimm5, u32 vb) override; + void VSPLTH(u32 vd, u32 uimm5, u32 vb) override; + void VSPLTISB(u32 vd, s32 simm5) override; + void VSPLTISH(u32 vd, s32 simm5) override; + void VSPLTISW(u32 vd, s32 simm5) override; + void VSPLTW(u32 vd, u32 uimm5, u32 vb) override; + void VSR(u32 vd, u32 va, u32 vb) override; + void VSRAB(u32 vd, u32 va, u32 vb) override; + void VSRAH(u32 vd, u32 va, u32 vb) override; + void VSRAW(u32 vd, u32 va, u32 vb) override; + void VSRB(u32 vd, u32 va, u32 vb) override; + void VSRH(u32 vd, u32 va, u32 vb) override; + void VSRO(u32 vd, u32 va, u32 vb) override; + void VSRW(u32 vd, u32 va, u32 vb) override; + void VSUBCUW(u32 vd, u32 va, u32 vb) override; + void VSUBFP(u32 vd, u32 va, u32 vb) override; + void VSUBSBS(u32 vd, u32 va, u32 vb) override; + void VSUBSHS(u32 vd, u32 va, u32 vb) override; + void VSUBSWS(u32 vd, u32 va, u32 vb) override; + void VSUBUBM(u32 vd, u32 va, u32 vb) override; + void VSUBUBS(u32 vd, u32 va, u32 vb) override; + void VSUBUHM(u32 vd, u32 va, u32 vb) override; + void VSUBUHS(u32 vd, u32 va, u32 vb) override; + void VSUBUWM(u32 vd, u32 va, u32 vb) override; + void VSUBUWS(u32 vd, u32 va, u32 vb) override; + void VSUMSWS(u32 vd, u32 va, u32 vb) override; + void VSUM2SWS(u32 vd, u32 va, u32 vb) override; + void VSUM4SBS(u32 vd, u32 va, u32 vb) override; + void VSUM4SHS(u32 vd, u32 va, u32 vb) override; + void VSUM4UBS(u32 vd, u32 va, u32 vb) override; + void VUPKHPX(u32 vd, u32 vb) override; + void VUPKHSB(u32 vd, u32 vb) override; + void VUPKHSH(u32 vd, u32 vb) override; + void VUPKLPX(u32 vd, u32 vb) override; + void VUPKLSB(u32 vd, u32 vb) override; + void VUPKLSH(u32 vd, u32 vb) override; + void VXOR(u32 vd, u32 va, u32 vb) override; + void MULLI(u32 rd, u32 ra, s32 simm16) override; + void SUBFIC(u32 rd, u32 ra, s32 simm16) override; + void CMPLI(u32 bf, u32 l, u32 ra, u32 uimm16) override; + void CMPI(u32 bf, u32 l, u32 ra, s32 simm16) override; + void ADDIC(u32 rd, u32 ra, s32 simm16) override; + void ADDIC_(u32 rd, u32 ra, s32 simm16) override; + void ADDI(u32 rd, u32 ra, s32 simm16) override; + void ADDIS(u32 rd, u32 ra, s32 simm16) override; + void BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) override; + void SC(u32 sc_code) override; + void B(s32 ll, u32 aa, u32 lk) override; + void MCRF(u32 crfd, u32 crfs) override; + void BCLR(u32 bo, u32 bi, u32 bh, u32 lk) override; + void CRNOR(u32 bt, u32 ba, u32 bb) override; + void CRANDC(u32 bt, u32 ba, u32 bb) override; + void ISYNC() override; + void CRXOR(u32 bt, u32 ba, u32 bb) override; + void CRNAND(u32 bt, u32 ba, u32 bb) override; + void CRAND(u32 bt, u32 ba, u32 bb) override; + void CREQV(u32 bt, u32 ba, u32 bb) override; + void CRORC(u32 bt, u32 ba, u32 bb) override; + void CROR(u32 bt, u32 ba, u32 bb) override; + void BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) override; + void RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) override; + void RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) override; + void RLWNM(u32 ra, u32 rs, u32 rb, u32 MB, u32 ME, bool rc) override; + void ORI(u32 rs, u32 ra, u32 uimm16) override; + void ORIS(u32 rs, u32 ra, u32 uimm16) override; + void XORI(u32 ra, u32 rs, u32 uimm16) override; + void XORIS(u32 ra, u32 rs, u32 uimm16) override; + void ANDI_(u32 ra, u32 rs, u32 uimm16) override; + void ANDIS_(u32 ra, u32 rs, u32 uimm16) override; + void RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override; + void RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) override; + void RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override; + void RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override; + void RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) override; + void CMP(u32 crfd, u32 l, u32 ra, u32 rb) override; + void TW(u32 to, u32 ra, u32 rb) override; + void LVSL(u32 vd, u32 ra, u32 rb) override; + void LVEBX(u32 vd, u32 ra, u32 rb) override; + void SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) override; + void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void MULHWU(u32 rd, u32 ra, u32 rb, bool rc) override; + void MFOCRF(u32 a, u32 rd, u32 crm) override; + void LWARX(u32 rd, u32 ra, u32 rb) override; + void LDX(u32 ra, u32 rs, u32 rb) override; + void LWZX(u32 rd, u32 ra, u32 rb) override; + void SLW(u32 ra, u32 rs, u32 rb, bool rc) override; + void CNTLZW(u32 ra, u32 rs, bool rc) override; + void SLD(u32 ra, u32 rs, u32 rb, bool rc) override; + void AND(u32 ra, u32 rs, u32 rb, bool rc) override; + void CMPL(u32 bf, u32 l, u32 ra, u32 rb) override; + void LVSR(u32 vd, u32 ra, u32 rb) override; + void LVEHX(u32 vd, u32 ra, u32 rb) override; + void SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void LDUX(u32 rd, u32 ra, u32 rb) override; + void DCBST(u32 ra, u32 rb) override; + void LWZUX(u32 rd, u32 ra, u32 rb) override; + void CNTLZD(u32 ra, u32 rs, bool rc) override; + void ANDC(u32 ra, u32 rs, u32 rb, bool rc) override; + void TD(u32 to, u32 ra, u32 rb) override; + void LVEWX(u32 vd, u32 ra, u32 rb) override; + void MULHD(u32 rd, u32 ra, u32 rb, bool rc) override; + void MULHW(u32 rd, u32 ra, u32 rb, bool rc) override; + void LDARX(u32 rd, u32 ra, u32 rb) override; + void DCBF(u32 ra, u32 rb) override; + void LBZX(u32 rd, u32 ra, u32 rb) override; + void LVX(u32 vd, u32 ra, u32 rb) override; + void NEG(u32 rd, u32 ra, u32 oe, bool rc) override; + void LBZUX(u32 rd, u32 ra, u32 rb) override; + void NOR(u32 ra, u32 rs, u32 rb, bool rc) override; + void STVEBX(u32 vs, u32 ra, u32 rb) override; + void SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void MTOCRF(u32 l, u32 crm, u32 rs) override; + void STDX(u32 rs, u32 ra, u32 rb) override; + void STWCX_(u32 rs, u32 ra, u32 rb) override; + void STWX(u32 rs, u32 ra, u32 rb) override; + void STVEHX(u32 vs, u32 ra, u32 rb) override; + void STDUX(u32 rs, u32 ra, u32 rb) override; + void STWUX(u32 rs, u32 ra, u32 rb) override; + void STVEWX(u32 vs, u32 ra, u32 rb) override; + void SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) override; + void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) override; + void STDCX_(u32 rs, u32 ra, u32 rb) override; + void STBX(u32 rs, u32 ra, u32 rb) override; + void STVX(u32 vs, u32 ra, u32 rb) override; + void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) override; + void ADDME(u32 rd, u32 ra, u32 oe, bool rc) override; + void MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void DCBTST(u32 ra, u32 rb, u32 th) override; + void STBUX(u32 rs, u32 ra, u32 rb) override; + void ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void DCBT(u32 ra, u32 rb, u32 th) override; + void LHZX(u32 rd, u32 ra, u32 rb) override; + void EQV(u32 ra, u32 rs, u32 rb, bool rc) override; + void ECIWX(u32 rd, u32 ra, u32 rb) override; + void LHZUX(u32 rd, u32 ra, u32 rb) override; + void XOR(u32 rs, u32 ra, u32 rb, bool rc) override; + void MFSPR(u32 rd, u32 spr) override; + void LWAX(u32 rd, u32 ra, u32 rb) override; + void DST(u32 ra, u32 rb, u32 strm, u32 t) override; + void LHAX(u32 rd, u32 ra, u32 rb) override; + void LVXL(u32 vd, u32 ra, u32 rb) override; + void MFTB(u32 rd, u32 spr) override; + void LWAUX(u32 rd, u32 ra, u32 rb) override; + void DSTST(u32 ra, u32 rb, u32 strm, u32 t) override; + void LHAUX(u32 rd, u32 ra, u32 rb) override; + void STHX(u32 rs, u32 ra, u32 rb) override; + void ORC(u32 rs, u32 ra, u32 rb, bool rc) override; + void ECOWX(u32 rs, u32 ra, u32 rb) override; + void STHUX(u32 rs, u32 ra, u32 rb) override; + void OR(u32 ra, u32 rs, u32 rb, bool rc) override; + void DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void MTSPR(u32 spr, u32 rs) override; + //DCBI + void NAND(u32 ra, u32 rs, u32 rb, bool rc) override; + void STVXL(u32 vs, u32 ra, u32 rb) override; + void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override; + void LVLX(u32 vd, u32 ra, u32 rb) override; + void LDBRX(u32 rd, u32 ra, u32 rb) override; + void LSWX(u32 rd, u32 ra, u32 rb) override; + void LWBRX(u32 rd, u32 ra, u32 rb) override; + void LFSX(u32 frd, u32 ra, u32 rb) override; + void SRW(u32 ra, u32 rs, u32 rb, bool rc) override; + void SRD(u32 ra, u32 rs, u32 rb, bool rc) override; + void LVRX(u32 vd, u32 ra, u32 rb) override; + void LSWI(u32 rd, u32 ra, u32 nb) override; + void LFSUX(u32 frd, u32 ra, u32 rb) override; + void SYNC(u32 l) override; + void LFDX(u32 frd, u32 ra, u32 rb) override; + void LFDUX(u32 frd, u32 ra, u32 rb) override; + void STVLX(u32 vs, u32 ra, u32 rb) override; + void STSWX(u32 rs, u32 ra, u32 rb) override; + void STWBRX(u32 rs, u32 ra, u32 rb) override; + void STFSX(u32 frs, u32 ra, u32 rb) override; + void STVRX(u32 vs, u32 ra, u32 rb) override; + void STFSUX(u32 frs, u32 ra, u32 rb) override; + void STSWI(u32 rd, u32 ra, u32 nb) override; + void STFDX(u32 frs, u32 ra, u32 rb) override; + void STFDUX(u32 frs, u32 ra, u32 rb) override; + void LVLXL(u32 vd, u32 ra, u32 rb) override; + void LHBRX(u32 rd, u32 ra, u32 rb) override; + void SRAW(u32 ra, u32 rs, u32 rb, bool rc) override; + void SRAD(u32 ra, u32 rs, u32 rb, bool rc) override; + void LVRXL(u32 vd, u32 ra, u32 rb) override; + void DSS(u32 strm, u32 a) override; + void SRAWI(u32 ra, u32 rs, u32 sh, bool rc) override; + void SRADI1(u32 ra, u32 rs, u32 sh, bool rc) override; + void SRADI2(u32 ra, u32 rs, u32 sh, bool rc) override; + void EIEIO() override; + void STVLXL(u32 vs, u32 ra, u32 rb) override; + void STHBRX(u32 rs, u32 ra, u32 rb) override; + void EXTSH(u32 ra, u32 rs, bool rc) override; + void STVRXL(u32 sd, u32 ra, u32 rb) override; + void EXTSB(u32 ra, u32 rs, bool rc) override; + void STFIWX(u32 frs, u32 ra, u32 rb) override; + void EXTSW(u32 ra, u32 rs, bool rc) override; + void ICBI(u32 ra, u32 rb) override; + void DCBZ(u32 ra, u32 rb) override; + void LWZ(u32 rd, u32 ra, s32 d) override; + void LWZU(u32 rd, u32 ra, s32 d) override; + void LBZ(u32 rd, u32 ra, s32 d) override; + void LBZU(u32 rd, u32 ra, s32 d) override; + void STW(u32 rs, u32 ra, s32 d) override; + void STWU(u32 rs, u32 ra, s32 d) override; + void STB(u32 rs, u32 ra, s32 d) override; + void STBU(u32 rs, u32 ra, s32 d) override; + void LHZ(u32 rd, u32 ra, s32 d) override; + void LHZU(u32 rd, u32 ra, s32 d) override; + void LHA(u32 rs, u32 ra, s32 d) override; + void LHAU(u32 rs, u32 ra, s32 d) override; + void STH(u32 rs, u32 ra, s32 d) override; + void STHU(u32 rs, u32 ra, s32 d) override; + void LMW(u32 rd, u32 ra, s32 d) override; + void STMW(u32 rs, u32 ra, s32 d) override; + void LFS(u32 frd, u32 ra, s32 d) override; + void LFSU(u32 frd, u32 ra, s32 d) override; + void LFD(u32 frd, u32 ra, s32 d) override; + void LFDU(u32 frd, u32 ra, s32 d) override; + void STFS(u32 frs, u32 ra, s32 d) override; + void STFSU(u32 frs, u32 ra, s32 d) override; + void STFD(u32 frs, u32 ra, s32 d) override; + void STFDU(u32 frs, u32 ra, s32 d) override; + void LD(u32 rd, u32 ra, s32 ds) override; + void LDU(u32 rd, u32 ra, s32 ds) override; + void LWA(u32 rd, u32 ra, s32 ds) override; + void FDIVS(u32 frd, u32 fra, u32 frb, bool rc) override; + void FSUBS(u32 frd, u32 fra, u32 frb, bool rc) override; + void FADDS(u32 frd, u32 fra, u32 frb, bool rc) override; + void FSQRTS(u32 frd, u32 frb, bool rc) override; + void FRES(u32 frd, u32 frb, bool rc) override; + void FMULS(u32 frd, u32 fra, u32 frc, bool rc) override; + void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void STD(u32 rs, u32 ra, s32 ds) override; + void STDU(u32 rs, u32 ra, s32 ds) override; + void MTFSB1(u32 bt, bool rc) override; + void MCRFS(u32 bf, u32 bfa) override; + void MTFSB0(u32 bt, bool rc) override; + void MTFSFI(u32 crfd, u32 i, bool rc) override; + void MFFS(u32 frd, bool rc) override; + void MTFSF(u32 flm, u32 frb, bool rc) override; + + void FCMPU(u32 bf, u32 fra, u32 frb) override; + void FRSP(u32 frd, u32 frb, bool rc) override; + void FCTIW(u32 frd, u32 frb, bool rc) override; + void FCTIWZ(u32 frd, u32 frb, bool rc) override; + void FDIV(u32 frd, u32 fra, u32 frb, bool rc) override; + void FSUB(u32 frd, u32 fra, u32 frb, bool rc) override; + void FADD(u32 frd, u32 fra, u32 frb, bool rc) override; + void FSQRT(u32 frd, u32 frb, bool rc) override; + void FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void FMUL(u32 frd, u32 fra, u32 frc, bool rc) override; + void FRSQRTE(u32 frd, u32 frb, bool rc) override; + void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override; + void FCMPO(u32 crfd, u32 fra, u32 frb) override; + void FNEG(u32 frd, u32 frb, bool rc) override; + void FMR(u32 frd, u32 frb, bool rc) override; + void FNABS(u32 frd, u32 frb, bool rc) override; + void FABS(u32 frd, u32 frb, bool rc) override; + void FCTID(u32 frd, u32 frb, bool rc) override; + void FCTIDZ(u32 frd, u32 frb, bool rc) override; + void FCFID(u32 frd, u32 frb, bool rc) override; + + void UNK(const u32 code, const u32 opcode, const u32 gcode) override; + + private: + /// Map from compiled code fragment to the LLVM function for the code fragment + std::map m_compiled; + + /// LLVM context + llvm::LLVMContext * m_llvm_context; + + /// LLVM IR builder + llvm::IRBuilder<> * m_ir_builder; + + /// Module to which all generated code is output to + llvm::Module * m_module; + + /// JIT execution engine + llvm::ExecutionEngine * m_execution_engine; + + /// Function pass manager + llvm::FunctionPassManager * m_fpm; + + /// A flag used to detect branch instructions. + /// This is set to false at the start of compilation of a block. + /// When a branch instruction is encountered, this is set to true by the decode function. + bool m_hit_branch_instruction; + + /// The function being compiled + llvm::Function * m_current_function; + + /// The list of next blocks for the current block + const std::vector * m_current_block_next_blocks; + + /// Address of the current instruction + u32 m_current_instruction_address; + + /// Compiler stats + Stats m_stats; + + /// Get the name of the basic block for the specified address + std::string GetBasicBlockNameFromAddress(u32 address); + + /// Get the basic block in for the specified address. + llvm::BasicBlock * GetBasicBlockFromAddress(u32 address, llvm::Function * function, bool create_if_not_exist = false); + + /// Get PPU state pointer argument + llvm::Value * GetPPUStateArg(); + + /// Get interpreter pointer argument + llvm::Value * GetInterpreterArg(); + + /// Get tracer pointer argument + llvm::Value * GetTracerArg(); + + /// Get a bit + llvm::Value * GetBit(llvm::Value * val, u32 n); + + /// Clear a bit + llvm::Value * ClrBit(llvm::Value * val, u32 n); + + /// Set a bit + llvm::Value * SetBit(llvm::Value * val, u32 n, llvm::Value * bit, bool doClear = true); + + /// Get a nibble + llvm::Value * GetNibble(llvm::Value * val, u32 n); + + /// Clear a nibble + llvm::Value * ClrNibble(llvm::Value * val, u32 n); + + /// Set a nibble + llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * nibble, bool doClear = true); + + /// Set a nibble + llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3, bool doClear = true); + + /// Load PC + llvm::Value * GetPc(); + + /// Set PC + void SetPc(llvm::Value * val_ix); + + /// Load GPR + llvm::Value * GetGpr(u32 r, u32 num_bits = 64); + + /// Set GPR + void SetGpr(u32 r, llvm::Value * val_x64); + + /// Load CR + llvm::Value * GetCr(); + + /// Load CR and get field CRn + llvm::Value * GetCrField(u32 n); + + /// Set CR + void SetCr(llvm::Value * val_x32); + + /// Set CR field + void SetCrField(u32 n, llvm::Value * field); + + /// Set CR field + void SetCrField(u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3); + + /// Set CR field based on signed comparison + void SetCrFieldSignedCmp(u32 n, llvm::Value * a, llvm::Value * b); + + /// Set CR field based on unsigned comparison + void SetCrFieldUnsignedCmp(u32 n, llvm::Value * a, llvm::Value * b); + + /// Set CR6 based on the result of the vector compare instruction + void SetCr6AfterVectorCompare(u32 vr); + + /// Get LR + llvm::Value * GetLr(); + + /// Set LR + void SetLr(llvm::Value * val_x64); + + /// Get CTR + llvm::Value * GetCtr(); + + /// Set CTR + void SetCtr(llvm::Value * val_x64); + + /// Load XER and convert it to an i64 + llvm::Value * GetXer(); + + /// Load XER and return the CA bit + llvm::Value * GetXerCa(); + + /// Load XER and return the SO bit + llvm::Value * GetXerSo(); + + /// Set XER + void SetXer(llvm::Value * val_x64); + + /// Set the CA bit of XER + void SetXerCa(llvm::Value * ca); + + /// Set the SO bit of XER + void SetXerSo(llvm::Value * so); + + /// Get USPRG0 + llvm::Value * GetUsprg0(); + + /// Set USPRG0 + void SetUsprg0(llvm::Value * val_x64); + + /// Get FPR + llvm::Value * GetFpr(u32 r, u32 bits = 64, bool as_int = false); + + /// Set FPR + void SetFpr(u32 r, llvm::Value * val); + + /// Load VSCR + llvm::Value * GetVscr(); + + /// Set VSCR + void SetVscr(llvm::Value * val_x32); + + /// Load VR + llvm::Value * GetVr(u32 vr); + + /// Load VR and convert it to an integer vector + llvm::Value * GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits); + + /// Load VR and convert it to a float vector with 4 elements + llvm::Value * GetVrAsFloatVec(u32 vr); + + /// Load VR and convert it to a double vector with 2 elements + llvm::Value * GetVrAsDoubleVec(u32 vr); + + /// Set VR to the specified value + void SetVr(u32 vr, llvm::Value * val_x128); + + /// Check condition for branch instructions + llvm::Value * CheckBranchCondition(u32 bo, u32 bi); + + /// Create IR for a branch instruction + void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk, bool target_is_lr = false); + + /// Read from memory + llvm::Value * ReadMemory(llvm::Value * addr_i64, u32 bits, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); + + /// Write to memory + void WriteMemory(llvm::Value * addr_i64, llvm::Value * val_ix, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); + + /// Call an interpreter function + template + llvm::Value * InterpreterCall(const char * name, Func function, Args... args); + + /// Convert a C++ type to an LLVM type + template + llvm::Type * CppToLlvmType(); + + /// Call a function + template + llvm::Value * Call(const char * name, Func function, Args... args); + + /// Tests if the instruction is a branch instruction or not + bool IsBranchInstruction(u32 instruction); + + /// Test an instruction against the interpreter + template + void VerifyInstructionAgainstInterpreter(const char * name, PPULLVMRecompilerFn recomp_fn, PPUInterpreterFn interp_fn, PPUState & input_state, Args... args); + + /// Excute a test + void RunTest(const char * name, std::function test_case, std::function input, std::function check_result); + + /// A mask used in rotate instructions + static u64 s_rotate_mask[64][64]; + + /// A flag indicating whether s_rotate_mask has been initialised or not + static bool s_rotate_mask_inited; + + /// Initialse s_rotate_mask + static void InitRotateMask(); + }; + + /// Analyses execution traces and finds hot paths + class Profiler { + + }; + + class RecompilationEngine { + public: + virtual ~RecompilationEngine() = default; + + /// Get the compiled code fragment for the specified address + CompiledCodeFragment GetCompiledCodeFragment(u32 address); + + /// Release a compiled code fragment earlier obtained through GetCompiledCodeFragment + void ReleaseCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment); + + /// Get the current revision + u32 GetCurrentRevision(); + + /// Get a pointer to the instance of this class + static std::shared_ptr GetInstance(); + + private: + RecompilationEngine() = default; + + RecompilationEngine(const RecompilationEngine & other) = delete; + RecompilationEngine(RecompilationEngine && other) = delete; + + RecompilationEngine & operator = (const RecompilationEngine & other) = delete; + RecompilationEngine & operator = (RecompilationEngine && other) = delete; + + /// Mutex used to prevent multiple creation + static std::mutex s_mutex; + + /// The instance + static std::shared_ptr s_the_instance; + }; + + /// Finds interesting execution sequences + class Tracer { + public: + Tracer(); + + Tracer(const Tracer & other) = delete; + Tracer(Tracer && other) = delete; + + virtual ~Tracer(); + + Tracer & operator = (const Tracer & other) = delete; + Tracer & operator = (Tracer && other) = delete; + + /// Notify the tracer that a branch was encountered + void Trace(BranchType branch_type, u32 address); + + /// Notify the tracer that the execution sequence is being terminated. + void Terminate(); + + private: + /// Current execution trace + std::vector m_trace; + + /// Call stack + std::vector m_stack; + }; + + /// PPU execution engine + class ExecutionEngine : public CPUDecoder { + public: + ExecutionEngine(PPUThread & ppu); + ExecutionEngine() = delete; + + ExecutionEngine(const ExecutionEngine & other) = delete; + ExecutionEngine(ExecutionEngine && other) = delete; + + virtual ~ExecutionEngine(); + + ExecutionEngine & operator = (const ExecutionEngine & other) = delete; + ExecutionEngine & operator = (ExecutionEngine && other) = delete; + + u8 DecodeMemory(const u32 address) override; + + private: + /// PPU processor context + PPUThread & m_ppu; + + /// PPU Interpreter + PPUInterpreter * m_interpreter; + + /// PPU instruction Decoder + PPUDecoder m_decoder; + + /// Execution tracer + Tracer m_tracer; + + /// Set to true if the last executed instruction was a branch + BranchType m_last_branch_type; + + /// The time at which the m_address_to_compiled_code_fragment cache was last cleared + std::chrono::high_resolution_clock::time_point m_last_cache_clear_time; + + /// The revision of the recompiler to which this thread is synced + u32 m_recompiler_revision; + + /// Address to compiled code fragmnet lookup. Key is address. Data is the pair (compiled code fragment, times hit). + std::unordered_map> m_address_to_compiled_code_fragment; + + /// Recompilation engine + std::shared_ptr m_recompilation_engine; + }; + + // Get the branch type from a branch instruction + BranchType GetBranchTypeFromInstruction(u32 instruction); +} #endif // PPU_LLVM_RECOMPILER_H diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index e164e1c531..8f1cc2407c 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -630,7 +630,7 @@ Emu\Cell - Source Files + Emu\Cell From 160b58cf617d6f98038875cf03904ee201bc31c1 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 25 Oct 2014 06:56:57 +0530 Subject: [PATCH 02/27] Fixed compilation issues --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 11 ++++------- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 13 +++++++------ rpcs3/Emu/Cell/PPUThread.cpp | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 4dda28cb55..9f37308fc7 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -4748,7 +4748,7 @@ CompiledCodeFragment RecompilationEngine::GetCompiledCodeFragment(u32 address) { return nullptr; } -void ReleaseCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment) { +void RecompilationEngine::ReleaseCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment) { } @@ -4778,7 +4778,6 @@ void Tracer::Trace(BranchType branch_type, u32 address) { ExecutionTrace * execution_trace = nullptr; BlockId block_id; int function; - int start; block_id.address = address; block_id.type = branch_type; @@ -4810,13 +4809,11 @@ void Tracer::Trace(BranchType branch_type, u32 address) { function = m_stack.back(); m_stack.pop_back(); - start = function; - execution_trace = new ExecutionTrace(); execution_trace->function_address = m_trace[function].address; execution_trace->type = ExecutionTrace::Linear; - execution_trace->blocks.insert(execution_trace->blocks.begin(), m_trace.begin() + start, m_trace.end()); - m_trace.erase(m_trace.begin() + start + 1, m_trace.end()); + execution_trace->blocks.insert(execution_trace->blocks.begin(), m_trace.begin() + function, m_trace.end()); + m_trace.erase(m_trace.begin() + function + 1, m_trace.end()); break; case None: break; @@ -4908,7 +4905,7 @@ u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { return ret; } -BranchType GetBranchTypeFromInstruction(u32 instruction) { +BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) { auto type = BranchType::None; auto field1 = instruction >> 26; auto lk = instruction & 1; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index d601503e1b..2b637c4b4c 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -11,9 +11,10 @@ //#define PPU_LLVM_RECOMPILER_UNIT_TESTS 1 using namespace llvm; +using namespace ppu_recompiler_llvm; #define VERIFY_INSTRUCTION_AGAINST_INTERPRETER(fn, tc, input, ...) \ -VerifyInstructionAgainstInterpreter(fmt::Format("%s.%d", #fn, tc).c_str(), &PPULLVMRecompiler::fn, &PPUInterpreter::fn, input, __VA_ARGS__) +VerifyInstructionAgainstInterpreter(fmt::Format("%s.%d", #fn, tc).c_str(), &Compiler::fn, &PPUInterpreter::fn, input, __VA_ARGS__) #define VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(fn, s, n, ...) { \ PPUState input; \ @@ -24,7 +25,7 @@ VerifyInstructionAgainstInterpreter(fmt::Format("%s.%d", #fn, tc).c_str(), &PPUL } /// Register state of a PPU -struct PPUState { + struct ppu_recompiler_llvm::PPUState { /// Floating point registers PPCdouble FPR[32]; @@ -201,8 +202,8 @@ static PPUThread * s_ppu_state = nullptr; static PPUInterpreter * s_interpreter = nullptr; #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS -template -void PPULLVMRecompiler::VerifyInstructionAgainstInterpreter(const char * name, PPULLVMRecompilerFn recomp_fn, PPUInterpreterFn interp_fn, PPUState & input_state, Args... args) { +template +void Compiler::VerifyInstructionAgainstInterpreter(const char * name, CompilerFn recomp_fn, PPUInterpreterFn interp_fn, PPUState & input_state, Args... args) { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS auto test_case = [&]() { (this->*recomp_fn)(args...); @@ -232,7 +233,7 @@ void PPULLVMRecompiler::VerifyInstructionAgainstInterpreter(const char * name, P #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS } -void PPULLVMRecompiler::RunTest(const char * name, std::function test_case, std::function input, std::function check_result) { +void Compiler::RunTest(const char * name, std::function test_case, std::function input, std::function check_result) { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS // Create the unit test function m_current_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(), @@ -311,7 +312,7 @@ void PPULLVMRecompiler::RunTest(const char * name, std::function test_ca #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS } -void PPULLVMRecompiler::RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter) { +void Compiler::RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter) { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS s_ppu_state = ppu_state; s_interpreter = interpreter; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index f9d01a75a7..df7ce2740b 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -112,7 +112,7 @@ void PPUThread::DoRun() case 2: if (!m_dec) { - m_dec = new PPULLVMEmulator(*this); + m_dec = new ppu_recompiler_llvm::ExecutionEngine(*this); } break; From 7846fa5829da514ba6bef6c042fdd39e2909e11f Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 1 Nov 2014 01:22:39 +0530 Subject: [PATCH 03/27] Made changes to tracer so that it understand compiled blocks --- rpcs3/Emu/Cell/PPUInterpreter.h | 2 +- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 424 +++++++++++++++++++++------ rpcs3/Emu/Cell/PPULLVMRecompiler.h | 168 +++++++---- 3 files changed, 456 insertions(+), 138 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index da61d3f1b7..f476d8c870 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -61,7 +61,7 @@ namespace ppu_recompiler_llvm { class PPUInterpreter : public PPUOpcodes { - friend class ppu_recompiler_llvm::Compiler; + friend class ppu_recompiler_llvm::Compiler; private: PPUThread& CPU; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 9f37308fc7..952eed981c 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -76,7 +76,7 @@ Compiler::~Compiler() { delete m_llvm_context; } -CompiledCodeFragment Compiler::Compile(const std::string & name, const CodeFragment & code_fragment) { +Executable Compiler::Compile(const std::string & name, const CodeFragment & code_fragment) { assert(!name.empty()); assert(!code_fragment.empty()); @@ -114,7 +114,7 @@ CompiledCodeFragment Compiler::Compile(const std::string & name, const CodeFragm // Add code to notify the tracer about this function and branch to the first instruction m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0, m_current_function)); //Call("Tracer.Trace", &Tracer::Trace, *arg_i, - // m_ir_builder->getIntN(sizeof(Tracer::BranchType) * 8, code_fragment[0].first.type == FunctionStart ? Tracer::BranchType::CompiledFunctionCall : Tracer::BranchType::CompiledBlock), + // m_ir_builder->getInt32(code_fragment[0].first.type == Function ? FunctionCall : Block), // m_ir_builder->getInt32(code_fragment[0].first.address)); m_ir_builder->CreateBr(GetBasicBlockFromAddress(code_fragment[0].first.address, m_current_function)); @@ -123,9 +123,14 @@ CompiledCodeFragment Compiler::Compile(const std::string & name, const CodeFragm m_current_instruction_address = i->first.address; m_current_block_next_blocks = &(i->second); auto block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); - m_hit_branch_instruction = false; m_ir_builder->SetInsertPoint(block); + if (i != code_fragment.begin() && i->first.type == BlockId::Type::FunctionCall) { + auto ordinal = RecompilationEngine::GetInstance()->GetOrdinal(i->first.address); + + } + + m_hit_branch_instruction = false; while (!m_hit_branch_instruction) { if (!block->getInstList().empty()) { break; @@ -143,7 +148,7 @@ CompiledCodeFragment Compiler::Compile(const std::string & name, const CodeFragm } } - // If the function has an unknown block then notify the tracer + // If the function has an unknown block then add code to notify the tracer auto unknown_bb = GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function); if (!unknown_bb) { m_ir_builder->SetInsertPoint(unknown_bb); @@ -177,16 +182,17 @@ CompiledCodeFragment Compiler::Compile(const std::string & name, const CodeFragm auto compilation_end = std::chrono::high_resolution_clock::now(); m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); - m_compiled[(CompiledCodeFragment)mci.address()] = m_current_function; - return (CompiledCodeFragment)mci.address(); + //m_compiled[(CompiledCodeFragment)mci.address()] = m_current_function; + //return (CompiledCodeFragment)mci.address(); + return nullptr; } -void Compiler::FreeCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment) { - auto i = m_compiled.find(compiled_code_fragment); - if (i != m_compiled.end()) { - m_execution_engine->freeMachineCodeForFunction(i->second); - i->second->eraseFromParent(); - } +void Compiler::FreeCompiledCodeFragment(Executable compiled_code_fragment) { + //auto i = m_compiled.find(compiled_code_fragment); + //if (i != m_compiled.end()) { + // m_execution_engine->freeMachineCodeForFunction(i->second); + // i->second->eraseFromParent(); + //} } Compiler::Stats Compiler::GetStats() { @@ -4741,31 +4747,201 @@ void Compiler::InitRotateMask() { } } -std::mutex RecompilationEngine::s_mutex; -std::shared_ptr RecompilationEngine::s_the_instance; +std::mutex RecompilationEngine::s_mutex; +RecompilationEngine * RecompilationEngine::s_the_instance; -CompiledCodeFragment RecompilationEngine::GetCompiledCodeFragment(u32 address) { +RecompilationEngine::BlockEntry::BlockEntry() + : num_hits(0) + , is_compiled(false) { +} + +RecompilationEngine::BlockEntry::~BlockEntry() { + for (auto i = execution_traces.begin(); i != execution_traces.end(); i++) { + delete i->second; + } +} + +RecompilationEngine::RecompilationEngine() + : ThreadBase("PPU Recompilation Engine") { + Start(); +} + +RecompilationEngine::~RecompilationEngine() { + Stop(); +} + +u32 RecompilationEngine::GetOrdinal(u32 address) { + return 0xFFFFFFFF; +} + +Executable * RecompilationEngine::GetExecutableLookup() const { return nullptr; } -void RecompilationEngine::ReleaseCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment) { +void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { + { + std::lock_guard lock(m_pending_execution_traces_lock); + m_pending_execution_traces.push_back(execution_trace); + } + Notify(); + // TODO: Increase the priority of the recompilation engine thread } -u32 RecompilationEngine::GetCurrentRevision() { - return 0; +void RecompilationEngine::Task() { + std::chrono::nanoseconds idling_time(0); + + auto start = std::chrono::high_resolution_clock::now(); + while (!TestDestroy() && !Emu.IsStopped()) { + // Wait a few ms for something to happen + auto idling_start = std::chrono::high_resolution_clock::now(); + WaitForAnySignal(250); + auto idling_end = std::chrono::high_resolution_clock::now(); + idling_time += std::chrono::duration_cast(idling_end - idling_start); + + u32 num_processed = 0; + while (!TestDestroy() && !Emu.IsStopped()) { + ExecutionTrace * execution_trace; + + { + std::lock_guard lock(m_pending_execution_traces_lock); + + auto i = m_pending_execution_traces.begin(); + if (i != m_pending_execution_traces.end()) { + execution_trace = *i; + m_pending_execution_traces.erase(i); + } else { + break; + } + } + + auto block_i = ProcessExecutionTrace(execution_trace); + if (block_i != m_block_table.end()) { + CompileBlock(block_i); + } + } + + // TODO: Reduce the priority of the recompilation engine thread + + if (num_processed == 0) { + // If we get here, it means the recompilation engine is idling. + // We should use this oppurtunity to optimize the code. + } + } + + std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); + auto total_time = std::chrono::duration_cast(end - start); + auto compiler_stats = m_compiler.GetStats(); + + std::string error; + raw_fd_ostream log_file("PPULLVMRecompiler.log", error, sys::fs::F_Text); + log_file << "Total time = " << total_time.count() / 1000000 << "ms\n"; + log_file << " Time spent compiling = " << compiler_stats.total_time.count() / 1000000 << "ms\n"; + log_file << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n"; + log_file << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n"; + log_file << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; + log_file << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; + log_file << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; + log_file << "\nInterpreter fallback stats:\n"; + for (auto i = compiler_stats.interpreter_fallback_stats.begin(); i != compiler_stats.interpreter_fallback_stats.end(); i++) { + log_file << i->first << " = " << i->second << "\n"; + } + + //log_file << "\nDisassembly:\n"; + //auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); + //for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) { + // log_file << fmt::Format("%s: Size = %u bytes, Number of instructions = %u\n", i->second.llvm_function->getName().str().c_str(), i->second.size, i->second.num_instructions); + + // uint8_t * fn_ptr = (uint8_t *)i->second.executable; + // for (size_t pc = 0; pc < i->second.size;) { + // char str[1024]; + + // auto size = LLVMDisasmInstruction(disassembler, fn_ptr + pc, i->second.size - pc, (uint64_t)(fn_ptr + pc), str, sizeof(str)); + // log_file << str << '\n'; + // pc += size; + // } + //} + + //LLVMDisasmDispose(disassembler); + + //log_file << "\nLLVM IR:\n" << *m_module; + + LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting."); +} + +RecompilationEngine::BlockTable::iterator RecompilationEngine::ProcessExecutionTrace(ExecutionTrace * execution_trace) { + auto block_i = m_block_table.find(execution_trace->blocks[0].address); + if (block_i == m_block_table.end()) { + // New block + block_i = m_block_table.insert(m_block_table.end(), std::make_pair(execution_trace->blocks[0].address, BlockEntry())); + } + + block_i->second.num_hits++; + auto execution_trace_id = GetExecutionTraceId(execution_trace); + auto execution_trace_i = block_i->second.execution_traces.find(execution_trace_id); + if (execution_trace_i == block_i->second.execution_traces.end()) { + block_i->second.execution_traces.insert(std::make_pair(execution_trace_id, execution_trace)); + } + + if (!block_i->second.is_compiled && block_i->second.num_hits > 1000) { // TODO: Make threshold configurable + return block_i; + } + + return m_block_table.end(); +} + +void RecompilationEngine::CompileBlock(BlockTable::iterator block_i) { + auto code_fragment = BuildCodeFragmentFromBlock(block_i->second, false); +} + +CodeFragment RecompilationEngine::BuildCodeFragmentFromBlock(const BlockEntry & block_entry, bool force_inline) { + CodeFragment code_fragment; + //std::vector queue; + + //queue.push_back(&block_entry); + //for (auto q = queue.begin(); q != queue.end(); q++) { + // for (auto i = (*q)->execution_traces.begin(); i != (*q)->execution_traces.end(); i++) { + // for (auto j = i->second->blocks.begin(); j != i->second->blocks.end(); j++) { + // auto k = std::find_if(code_fragment.begin(), code_fragment.end(), + // [&j](const CodeFragment::value_type & v)->bool { return v.first.address == j->address; }); + // if (k == code_fragment.end()) { + // code_fragment.push_back(std::make_pair(*j, std::vector())); + // k = code_fragment.end() - 1; + // } + + // if ((j + 1) != i->second->blocks.end()) { + // auto l = std::find(k->second.begin(), k->second.end(), *(j + 1)); + // if (l == k->second.end()) { + // k->second.push_back(*(j + 1)); + // } + // } + + // if (force_inline && j->type == BlockId::Type::Normal) { + // auto block_i = m_block_table.find(j->address); + // if (block_i != m_block_table.end()) { + // if (std::find(queue.begin(), queue.end(), block_i->second) == queue.end()) { + // queue.push_back(&(block_i->second)); + // } + // } + // } + // } + // } + //} + + return code_fragment; } std::shared_ptr RecompilationEngine::GetInstance() { if (s_the_instance == nullptr) { std::lock_guard lock(s_mutex); - s_the_instance = std::shared_ptr(new RecompilationEngine()); + s_the_instance = new RecompilationEngine(); } - return s_the_instance; + return std::shared_ptr(s_the_instance); } -Tracer::Tracer() { +Tracer::Tracer() + : m_recompilation_engine(RecompilationEngine::GetInstance()) { m_trace.reserve(1000); m_stack.reserve(100); } @@ -4774,27 +4950,60 @@ Tracer::~Tracer() { Terminate(); } -void Tracer::Trace(BranchType branch_type, u32 address) { +void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { ExecutionTrace * execution_trace = nullptr; BlockId block_id; int function; - block_id.address = address; - block_id.type = branch_type; - switch (branch_type) { - case FunctionCall: + switch (trace_type) { + case TraceType::CallFunction: + // arg1 is address of the function + block_id.address = arg1; + block_id.type = BlockId::Type::FunctionCall; + m_trace.push_back(block_id); + break; + case TraceType::EnterFunction: + // arg1 is address. + block_id.address = arg1; + block_id.type = BlockId::Type::Normal; m_stack.push_back((u32)m_trace.size()); m_trace.push_back(block_id); break; - case Block: + case TraceType::ExitFromCompiledFunction: + // arg1 is address of function. + // arg2 is the address of the exit block. + block_id.address = arg1; + block_id.type = BlockId::Type::Normal; + m_stack.push_back((u32)m_trace.size()); + m_trace.push_back(block_id); + + block_id.address = arg2; + block_id.type = BlockId::Type::Exit; + m_trace.push_back(block_id); + break; + case TraceType::Return: + // No args used + function = m_stack.back(); + m_stack.pop_back(); + + execution_trace = new ExecutionTrace(); + execution_trace->type = ExecutionTrace::Type::Linear; + execution_trace->function_address = m_trace[function].address; + execution_trace->previous_block_address = 0; + std::copy(m_trace.begin() + function, m_trace.end(), std::back_inserter(execution_trace->blocks)); + m_trace.erase(m_trace.begin() + function, m_trace.end()); + break; + case TraceType::EnterBlock: + // arg1 is address. Other args are not used. function = m_stack.back(); for (int i = (int)m_trace.size() - 1; i >= function; i--) { - if (m_trace[i].address == address) { + if (m_trace[i].address == arg1 && m_trace[i].type == BlockId::Type::Normal) { // Found a loop within the current function - execution_trace = new ExecutionTrace(); - execution_trace->type = ExecutionTrace::Loop; - execution_trace->function_address = m_trace[function].address; - execution_trace->blocks.insert(execution_trace->blocks.begin(), m_trace.begin() + i, m_trace.end()); + execution_trace = new ExecutionTrace(); + execution_trace->type = ExecutionTrace::Type::Loop; + execution_trace->function_address = m_trace[function].address; + execution_trace->previous_block_address = i == function ? 0 : m_trace[i - 1].address; + std::copy(m_trace.begin() + i, m_trace.end(), std::back_inserter(execution_trace->blocks)); m_trace.erase(m_trace.begin() + i + 1, m_trace.end()); break; } @@ -4802,20 +5011,16 @@ void Tracer::Trace(BranchType branch_type, u32 address) { if (!execution_trace) { // A loop was not found + block_id.address = arg1; + block_id.type = BlockId::Type::Normal; m_trace.push_back(block_id); } break; - case Return: - function = m_stack.back(); - m_stack.pop_back(); - - execution_trace = new ExecutionTrace(); - execution_trace->function_address = m_trace[function].address; - execution_trace->type = ExecutionTrace::Linear; - execution_trace->blocks.insert(execution_trace->blocks.begin(), m_trace.begin() + function, m_trace.end()); - m_trace.erase(m_trace.begin() + function + 1, m_trace.end()); - break; - case None: + case TraceType::ExitFromCompiledBlock: + // arg1 is address of the exit block. + block_id.address = arg1; + block_id.type = BlockId::Type::Exit; + m_trace.push_back(block_id); break; default: assert(0); @@ -4823,14 +5028,17 @@ void Tracer::Trace(BranchType branch_type, u32 address) { } if (execution_trace) { - auto s = fmt::Format("Trace: 0x%08X, %s -> ", execution_trace->function_address, execution_trace->type == ExecutionTrace::Loop ? "Loop" : "Linear"); - for (auto i = 0; i < execution_trace->blocks.size(); i++) { - s += fmt::Format("0x%08X ", execution_trace->blocks[i]); + auto s = fmt::Format("Trace: 0x%08X, 0x%08X, %s -> ", execution_trace->function_address, execution_trace->previous_block_address, + execution_trace->type == ExecutionTrace::Type::Loop ? "Loop" : "Linear"); + for (auto i = 0; i < execution_trace->blocks.size(); i++) {; + s += fmt::Format("%c:0x%08X ", + execution_trace->blocks[i].type == BlockId::Type::Normal ? 'N' : + execution_trace->blocks[i].type == BlockId::Type::FunctionCall ? 'F' : 'E', + execution_trace->blocks[i].address); } LOG_NOTICE(PPU, s.c_str()); - delete execution_trace; - // TODO: Notify recompilation engine + //m_recompilation_engine->NotifyTrace(execution_trace); } } @@ -4842,36 +5050,27 @@ ppu_recompiler_llvm::ExecutionEngine::ExecutionEngine(PPUThread & ppu) : m_ppu(ppu) , m_interpreter(new PPUInterpreter(ppu)) , m_decoder(m_interpreter) - , m_last_branch_type(FunctionCall) , m_last_cache_clear_time(std::chrono::high_resolution_clock::now()) - , m_recompiler_revision(0) , m_recompilation_engine(RecompilationEngine::GetInstance()) { + m_executable_lookup = m_recompilation_engine->GetExecutableLookup(); } ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() { - for (auto iter = m_address_to_compiled_code_fragment.begin(); iter != m_address_to_compiled_code_fragment.end(); iter++) { - m_recompilation_engine->ReleaseCompiledCodeFragment(iter->second.first); - } } u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { + ExecuteFunction(this, &m_ppu, m_interpreter, &m_tracer); + return 0; +} + +void ppu_recompiler_llvm::ExecutionEngine::RemoveUnusedEntriesFromCache() { auto now = std::chrono::high_resolution_clock::now(); - if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 10000) { - bool clear_all = false; - - u32 revision = m_recompilation_engine->GetCurrentRevision(); - if (m_recompiler_revision != revision) { - m_recompiler_revision = revision; - clear_all = true; - } - - for (auto i = m_address_to_compiled_code_fragment.begin(); i != m_address_to_compiled_code_fragment.end();) { + for (auto i = m_address_to_ordinal.begin(); i != m_address_to_ordinal.end();) { auto tmp = i; i++; - if (tmp->second.second == 0 || clear_all) { - m_address_to_compiled_code_fragment.erase(tmp); - m_recompilation_engine->ReleaseCompiledCodeFragment(tmp->second.first); + if (tmp->second.second == 0) { + m_address_to_ordinal.erase(tmp); } else { tmp->second.second = 0; } @@ -4879,47 +5078,98 @@ u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { m_last_cache_clear_time = now; } +} - auto i = m_address_to_compiled_code_fragment.find(address); - if (i == m_address_to_compiled_code_fragment.end()) { - auto compiled_code_fragment = m_recompilation_engine->GetCompiledCodeFragment(address); - if (compiled_code_fragment) { - i = m_address_to_compiled_code_fragment.insert(m_address_to_compiled_code_fragment.end(), std::make_pair(address, std::make_pair(compiled_code_fragment, 0))); +Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Executable default_executable) { + // Find the ordinal for the specified address and insert it to the cache + auto i = m_address_to_ordinal.find(address); + if (i == m_address_to_ordinal.end()) { + auto ordinal = m_recompilation_engine->GetOrdinal(address); + if (ordinal != 0xFFFFFFFF) { + i = m_address_to_ordinal.insert(m_address_to_ordinal.end(), std::make_pair(address, std::make_pair(ordinal, 0))); } } - u8 ret = 0; - if (i != m_address_to_compiled_code_fragment.end()) { - m_last_branch_type = None; + Executable executable = default_executable; + if (i != m_address_to_ordinal.end()) { i->second.second++; - i->second.first(&m_ppu, m_interpreter); - } else { - if (m_last_branch_type != None) { - m_tracer.Trace(m_last_branch_type, address); - } - - ret = m_decoder.DecodeMemory(address); - m_last_branch_type = m_ppu.m_is_branch ? GetBranchTypeFromInstruction(vm::read32(address)) : None; + executable = m_executable_lookup[i->second.first]; } - return ret; + RemoveUnusedEntriesFromCache(); + return executable; +} + +u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer) { + tracer->Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0); + return ExecuteTillReturn(execution_engine, ppu_state, interpreter, tracer); +} + +u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer) { + bool terminate = false; + + while (!terminate) { + auto instruction = re32(vm::get_ref(ppu_state->PC)); + execution_engine->m_decoder.Decode(instruction); + auto is_branch = ppu_state->m_is_branch; + ppu_state->NextPc(4); + + if (is_branch) { + Executable executable; + auto branch_type = GetBranchTypeFromInstruction(instruction); + + switch (branch_type) { + case BranchType::Return: + tracer->Trace(Tracer::TraceType::Return, 0, 0); + terminate = true; + break; + case BranchType::FunctionCall: + tracer->Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); + executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction); + executable(execution_engine, ppu_state, interpreter, tracer); + // Fallthrough + case BranchType::LocalBranch: + tracer->Trace(Tracer::TraceType::EnterBlock, ppu_state->PC, 0); + executable = execution_engine->GetExecutable(ppu_state->PC, nullptr); + if (executable != nullptr) { + auto exit_block = executable(execution_engine, ppu_state, interpreter, tracer); + if (exit_block) { + tracer->Trace(Tracer::TraceType::ExitFromCompiledBlock, (u32)exit_block, 0); + } else { + tracer->Trace(Tracer::TraceType::Return, 0, 0); + terminate = true; + } + } + break; + default: + assert(0); + break; + } + } + } + + return 0; } BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) { - auto type = BranchType::None; + auto type = BranchType::NonBranch; auto field1 = instruction >> 26; auto lk = instruction & 1; if (field1 == 16 || field1 == 18) { - type = lk ? FunctionCall : Block; + type = lk ? BranchType::FunctionCall : BranchType::LocalBranch; } else if (field1 == 19) { u32 field2 = (instruction >> 1) & 0x3FF; if (field2 == 16) { - type = lk ? FunctionCall : Return; + type = lk ? BranchType::FunctionCall : BranchType::Return; } else if (field2 == 528) { - type = lk ? FunctionCall : Block; + type = lk ? BranchType::FunctionCall : BranchType::LocalBranch; } } return type; } + +ExecutionTraceId ppu_recompiler_llvm::GetExecutionTraceId(const ExecutionTrace * execution_trace) { + return 0; +} diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index a93da8021b..c3e4511da2 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -12,34 +12,44 @@ #include "llvm/PassManager.h" namespace ppu_recompiler_llvm { - /// Branch type - enum BranchType { - None, + class Compiler; + class RecompilationEngine; + class Tracer; + class ExecutionEngine; + + enum class BranchType { + NonBranch, + LocalBranch, FunctionCall, - Block, Return, }; /// Unique id of a block - union BlockId { - u64 block_id; + struct BlockId { + /// Address of the block + u32 address; - struct { - /// Address of the block - u32 address; - - /// The type of the block - BranchType type; - }; + /// The type of the block + enum class Type { + FunctionCall, + Normal, + Exit, + } type; }; + /// Uniquely identifies an execution trace + typedef u64 ExecutionTraceId; + /// An execution trace. struct ExecutionTrace { - /// The function in which this trace was found + /// The function to which this trace belongs u32 function_address; + /// The address of the block that came before this trace + u32 previous_block_address; + /// Execution trace type - enum { + enum class Type { Linear, Loop, } type; @@ -51,8 +61,8 @@ namespace ppu_recompiler_llvm { /// A fragment of PPU code. A list of (block, list of next blocks) pairs. typedef std::vector>> CodeFragment; - /// Pointer to a function built by compiling a fragment of PPU code - typedef u64(*CompiledCodeFragment)(PPUThread * ppu_state, PPUInterpreter * interpreter); + /// Pointer to an executable + typedef u64(*Executable)(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer); struct PPUState; @@ -86,11 +96,11 @@ namespace ppu_recompiler_llvm { Compiler & operator = (const Compiler & other) = delete; Compiler & operator = (Compiler && other) = delete; - /// Compile a code fragment - CompiledCodeFragment Compile(const std::string & name, const CodeFragment & code_fragment); + /// Compile a code fragment and obtain an executable + Executable Compile(const std::string & name, const CodeFragment & code_fragment); - /// Free a compiled code fragment - void FreeCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment); + /// Free an executable earilier obtained from the Compile function + void FreeCompiledCodeFragment(Executable executable); /// Retrieve compiler stats Stats GetStats(); @@ -502,9 +512,6 @@ namespace ppu_recompiler_llvm { void UNK(const u32 code, const u32 opcode, const u32 gcode) override; private: - /// Map from compiled code fragment to the LLVM function for the code fragment - std::map m_compiled; - /// LLVM context llvm::LLVMContext * m_llvm_context; @@ -716,29 +723,44 @@ namespace ppu_recompiler_llvm { static void InitRotateMask(); }; - /// Analyses execution traces and finds hot paths - class Profiler { - - }; - - class RecompilationEngine { + class RecompilationEngine : public ThreadBase { public: - virtual ~RecompilationEngine() = default; + virtual ~RecompilationEngine(); - /// Get the compiled code fragment for the specified address - CompiledCodeFragment GetCompiledCodeFragment(u32 address); + /// Get the ordinal for the specified address + u32 GetOrdinal(u32 address); - /// Release a compiled code fragment earlier obtained through GetCompiledCodeFragment - void ReleaseCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment); + /// Get the executable lookup table + Executable * GetExecutableLookup() const; - /// Get the current revision - u32 GetCurrentRevision(); + /// Notify the recompilation engine about a newly detected trace. It takes ownership of the trace. + void NotifyTrace(ExecutionTrace * execution_trace); + + void Task() override; /// Get a pointer to the instance of this class static std::shared_ptr GetInstance(); private: - RecompilationEngine() = default; + /// An entry in the block table + struct BlockEntry { + BlockEntry(); + ~BlockEntry(); + + /// Number of times this block was hit + u32 num_hits; + + /// Execution traces starting at this block + std::unordered_map execution_traces; + + /// Indicates whether the block has been compiled or not + bool is_compiled; + }; + + /// Block table type. Key is block address. + typedef std::unordered_map BlockTable; + + RecompilationEngine(); RecompilationEngine(const RecompilationEngine & other) = delete; RecompilationEngine(RecompilationEngine && other) = delete; @@ -746,16 +768,47 @@ namespace ppu_recompiler_llvm { RecompilationEngine & operator = (const RecompilationEngine & other) = delete; RecompilationEngine & operator = (RecompilationEngine && other) = delete; + /// Process an execution trace. Returns an iterator to a block table entry if the block should be compiled. + BlockTable::iterator ProcessExecutionTrace(ExecutionTrace * execution_trace); + + /// Compile a block + void CompileBlock(BlockTable::iterator block_i); + + /// Build code fragment from a block + CodeFragment BuildCodeFragmentFromBlock(const BlockEntry & block_entry, bool force_inline); + + /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. + std::mutex m_pending_execution_traces_lock; + + /// Queue of execution traces pending prcessing + std::list m_pending_execution_traces; + + /// Block table + BlockTable m_block_table; + + /// PPU Compiler + Compiler m_compiler; + /// Mutex used to prevent multiple creation static std::mutex s_mutex; /// The instance - static std::shared_ptr s_the_instance; + static RecompilationEngine * s_the_instance; }; /// Finds interesting execution sequences class Tracer { public: + /// Trace type + enum class TraceType { + CallFunction, + EnterFunction, + ExitFromCompiledFunction, + Return, + EnterBlock, + ExitFromCompiledBlock, + }; + Tracer(); Tracer(const Tracer & other) = delete; @@ -766,8 +819,8 @@ namespace ppu_recompiler_llvm { Tracer & operator = (const Tracer & other) = delete; Tracer & operator = (Tracer && other) = delete; - /// Notify the tracer that a branch was encountered - void Trace(BranchType branch_type, u32 address); + /// Notify the tracer + void Trace(TraceType trace_type, u32 arg1, u32 arg2); /// Notify the tracer that the execution sequence is being terminated. void Terminate(); @@ -778,6 +831,9 @@ namespace ppu_recompiler_llvm { /// Call stack std::vector m_stack; + + /// Recompilation engine + std::shared_ptr m_recompilation_engine; }; /// PPU execution engine @@ -809,24 +865,36 @@ namespace ppu_recompiler_llvm { /// Execution tracer Tracer m_tracer; - /// Set to true if the last executed instruction was a branch - BranchType m_last_branch_type; + /// Executable lookup table + Executable * m_executable_lookup; - /// The time at which the m_address_to_compiled_code_fragment cache was last cleared + /// The time at which the m_address_to_ordinal cache was last cleared std::chrono::high_resolution_clock::time_point m_last_cache_clear_time; - /// The revision of the recompiler to which this thread is synced - u32 m_recompiler_revision; - - /// Address to compiled code fragmnet lookup. Key is address. Data is the pair (compiled code fragment, times hit). - std::unordered_map> m_address_to_compiled_code_fragment; + /// Address to ordinal lookup. Key is address. Data is the pair (ordinal, times hit). + std::unordered_map> m_address_to_ordinal; /// Recompilation engine std::shared_ptr m_recompilation_engine; + + /// Remove unused entries from the m_address_to_ordinal cache + void RemoveUnusedEntriesFromCache(); + + /// Get the executable for the specified address + Executable GetExecutable(u32 address, Executable default_executable); + + /// Execute a function + static u64 ExecuteFunction(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer); + + /// Execute till the current function returns + static u64 ExecuteTillReturn(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer); }; - // Get the branch type from a branch instruction + /// Get the branch type from a branch instruction BranchType GetBranchTypeFromInstruction(u32 instruction); + + /// Get the execution trace id of an execution trace + ExecutionTraceId GetExecutionTraceId(const ExecutionTrace * execution_trace); } #endif // PPU_LLVM_RECOMPILER_H From 7c3c5ae08ed02f22f22c8ddf5b7ee93c6c53c78b Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Wed, 5 Nov 2014 01:01:20 +0530 Subject: [PATCH 04/27] Construct CFG from execution traces --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 522 ++++++++++++++------------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 110 ++++-- rpcs3/stdafx.h | 1 + 3 files changed, 354 insertions(+), 279 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 952eed981c..6adfdaed00 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -76,124 +76,124 @@ Compiler::~Compiler() { delete m_llvm_context; } -Executable Compiler::Compile(const std::string & name, const CodeFragment & code_fragment) { - assert(!name.empty()); - assert(!code_fragment.empty()); +//Executable Compiler::Compile(const std::string & name, const CodeFragment & code_fragment) { +// assert(!name.empty()); +// assert(!code_fragment.empty()); +// +// auto compilation_start = std::chrono::high_resolution_clock::now(); +// +// // Create the function +// m_current_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(), +// m_ir_builder->getInt8PtrTy() /*ppu_state*/, +// m_ir_builder->getInt8PtrTy() /*interpreter*/, +// m_ir_builder->getInt8PtrTy() /*tracer*/, nullptr); +// m_current_function->setCallingConv(CallingConv::X86_64_Win64); +// auto arg_i = m_current_function->arg_begin(); +// arg_i->setName("ppu_state"); +// (++arg_i)->setName("interpreter"); +// (++arg_i)->setName("tracer"); +// +// // Create the entry block +// GetBasicBlockFromAddress(0, m_current_function, true); +// +// // Create basic blocks for each instruction +// for (auto i = code_fragment.begin(); i != code_fragment.end(); i++) { +// u32 address = i->first.address; +// while (1) { +// GetBasicBlockFromAddress(address, m_current_function, true); +// +// u32 instr = vm::read32(address); +// if (IsBranchInstruction(instr)) { +// break; +// } +// +// address += 4; +// } +// } +// +// // Add code to notify the tracer about this function and branch to the first instruction +// m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0, m_current_function)); +// //Call("Tracer.Trace", &Tracer::Trace, *arg_i, +// // m_ir_builder->getInt32(code_fragment[0].first.type == Function ? FunctionCall : Block), +// // m_ir_builder->getInt32(code_fragment[0].first.address)); +// m_ir_builder->CreateBr(GetBasicBlockFromAddress(code_fragment[0].first.address, m_current_function)); +// +// // Convert each block in this code fragment to LLVM IR +// for (auto i = code_fragment.begin(); i != code_fragment.end(); i++) { +// m_current_instruction_address = i->first.address; +// m_current_block_next_blocks = &(i->second); +// auto block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); +// m_ir_builder->SetInsertPoint(block); +// +// if (i != code_fragment.begin() && i->first.type == BlockId::Type::FunctionCall) { +// auto ordinal = RecompilationEngine::GetInstance()->GetOrdinal(i->first.address); +// +// } +// +// m_hit_branch_instruction = false; +// while (!m_hit_branch_instruction) { +// if (!block->getInstList().empty()) { +// break; +// } +// +// u32 instr = vm::read32(m_current_instruction_address); +// Decode(instr); +// +// m_current_instruction_address += 4; +// if (!m_hit_branch_instruction) { +// block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); +// m_ir_builder->CreateBr(block); +// m_ir_builder->SetInsertPoint(block); +// } +// } +// } +// +// // If the function has an unknown block then add code to notify the tracer +// auto unknown_bb = GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function); +// if (!unknown_bb) { +// m_ir_builder->SetInsertPoint(unknown_bb); +// auto branch_type_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 1); +// for (auto i = pred_begin(unknown_bb); i != pred_end(unknown_bb); i++) { +// // We assume that the last but one instruction of the predecessor sets the branch type +// auto j = (*i)->rbegin(); +// j--; +// branch_type_i32->addIncoming(&(*j), *i); +// } +// +// //Call("NotifyBranch", &Tracer::NotifyBranch, *arg_i, +// // m_ir_builder->CreateZExtOrTrunc(branch_type_i32, m_ir_builder->getIntNTy(sizeof(Tracer::BranchType) * 8)), GetPc()); +// m_ir_builder->CreateRetVoid(); +// } +// +// auto ir_build_end = std::chrono::high_resolution_clock::now(); +// m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); +// +// // Optimize this function +// m_fpm->run(*m_current_function); +// auto optimize_end = std::chrono::high_resolution_clock::now(); +// m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); +// +// // Translate to machine code +// MachineCodeInfo mci; +// m_execution_engine->runJITOnFunction(m_current_function, &mci); +// auto translate_end = std::chrono::high_resolution_clock::now(); +// m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); +// +// auto compilation_end = std::chrono::high_resolution_clock::now(); +// m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); +// +// //m_compiled[(CompiledCodeFragment)mci.address()] = m_current_function; +// //return (CompiledCodeFragment)mci.address(); +// return nullptr; +//} - auto compilation_start = std::chrono::high_resolution_clock::now(); - - // Create the function - m_current_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(), - m_ir_builder->getInt8PtrTy() /*ppu_state*/, - m_ir_builder->getInt8PtrTy() /*interpreter*/, - m_ir_builder->getInt8PtrTy() /*tracer*/, nullptr); - m_current_function->setCallingConv(CallingConv::X86_64_Win64); - auto arg_i = m_current_function->arg_begin(); - arg_i->setName("ppu_state"); - (++arg_i)->setName("interpreter"); - (++arg_i)->setName("tracer"); - - // Create the entry block - GetBasicBlockFromAddress(0, m_current_function, true); - - // Create basic blocks for each instruction - for (auto i = code_fragment.begin(); i != code_fragment.end(); i++) { - u32 address = i->first.address; - while (1) { - GetBasicBlockFromAddress(address, m_current_function, true); - - u32 instr = vm::read32(address); - if (IsBranchInstruction(instr)) { - break; - } - - address += 4; - } - } - - // Add code to notify the tracer about this function and branch to the first instruction - m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0, m_current_function)); - //Call("Tracer.Trace", &Tracer::Trace, *arg_i, - // m_ir_builder->getInt32(code_fragment[0].first.type == Function ? FunctionCall : Block), - // m_ir_builder->getInt32(code_fragment[0].first.address)); - m_ir_builder->CreateBr(GetBasicBlockFromAddress(code_fragment[0].first.address, m_current_function)); - - // Convert each block in this code fragment to LLVM IR - for (auto i = code_fragment.begin(); i != code_fragment.end(); i++) { - m_current_instruction_address = i->first.address; - m_current_block_next_blocks = &(i->second); - auto block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); - m_ir_builder->SetInsertPoint(block); - - if (i != code_fragment.begin() && i->first.type == BlockId::Type::FunctionCall) { - auto ordinal = RecompilationEngine::GetInstance()->GetOrdinal(i->first.address); - - } - - m_hit_branch_instruction = false; - while (!m_hit_branch_instruction) { - if (!block->getInstList().empty()) { - break; - } - - u32 instr = vm::read32(m_current_instruction_address); - Decode(instr); - - m_current_instruction_address += 4; - if (!m_hit_branch_instruction) { - block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); - m_ir_builder->CreateBr(block); - m_ir_builder->SetInsertPoint(block); - } - } - } - - // If the function has an unknown block then add code to notify the tracer - auto unknown_bb = GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function); - if (!unknown_bb) { - m_ir_builder->SetInsertPoint(unknown_bb); - auto branch_type_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 1); - for (auto i = pred_begin(unknown_bb); i != pred_end(unknown_bb); i++) { - // We assume that the last but one instruction of the predecessor sets the branch type - auto j = (*i)->rbegin(); - j--; - branch_type_i32->addIncoming(&(*j), *i); - } - - //Call("NotifyBranch", &Tracer::NotifyBranch, *arg_i, - // m_ir_builder->CreateZExtOrTrunc(branch_type_i32, m_ir_builder->getIntNTy(sizeof(Tracer::BranchType) * 8)), GetPc()); - m_ir_builder->CreateRetVoid(); - } - - auto ir_build_end = std::chrono::high_resolution_clock::now(); - m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); - - // Optimize this function - m_fpm->run(*m_current_function); - auto optimize_end = std::chrono::high_resolution_clock::now(); - m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); - - // Translate to machine code - MachineCodeInfo mci; - m_execution_engine->runJITOnFunction(m_current_function, &mci); - auto translate_end = std::chrono::high_resolution_clock::now(); - m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); - - auto compilation_end = std::chrono::high_resolution_clock::now(); - m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); - - //m_compiled[(CompiledCodeFragment)mci.address()] = m_current_function; - //return (CompiledCodeFragment)mci.address(); - return nullptr; -} - -void Compiler::FreeCompiledCodeFragment(Executable compiled_code_fragment) { - //auto i = m_compiled.find(compiled_code_fragment); - //if (i != m_compiled.end()) { - // m_execution_engine->freeMachineCodeForFunction(i->second); - // i->second->eraseFromParent(); - //} -} +//void Compiler::FreeCompiledCodeFragment(Executable compiled_code_fragment) { +// //auto i = m_compiled.find(compiled_code_fragment); +// //if (i != m_compiled.end()) { +// // m_execution_engine->freeMachineCodeForFunction(i->second); +// // i->second->eraseFromParent(); +// //} +//} Compiler::Stats Compiler::GetStats() { return m_stats; @@ -4747,19 +4747,8 @@ void Compiler::InitRotateMask() { } } -std::mutex RecompilationEngine::s_mutex; -RecompilationEngine * RecompilationEngine::s_the_instance; - -RecompilationEngine::BlockEntry::BlockEntry() - : num_hits(0) - , is_compiled(false) { -} - -RecompilationEngine::BlockEntry::~BlockEntry() { - for (auto i = execution_traces.begin(); i != execution_traces.end(); i++) { - delete i->second; - } -} +std::mutex RecompilationEngine::s_mutex; +std::shared_ptr RecompilationEngine::s_the_instance = nullptr; RecompilationEngine::RecompilationEngine() : ThreadBase("PPU Recompilation Engine") { @@ -4815,10 +4804,7 @@ void RecompilationEngine::Task() { } } - auto block_i = ProcessExecutionTrace(execution_trace); - if (block_i != m_block_table.end()) { - CompileBlock(block_i); - } + ProcessExecutionTrace(*execution_trace); } // TODO: Reduce the priority of the recompilation engine thread @@ -4867,77 +4853,112 @@ void RecompilationEngine::Task() { //log_file << "\nLLVM IR:\n" << *m_module; LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting."); + s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this. } -RecompilationEngine::BlockTable::iterator RecompilationEngine::ProcessExecutionTrace(ExecutionTrace * execution_trace) { - auto block_i = m_block_table.find(execution_trace->blocks[0].address); - if (block_i == m_block_table.end()) { - // New block - block_i = m_block_table.insert(m_block_table.end(), std::make_pair(execution_trace->blocks[0].address, BlockEntry())); +void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) { + auto execution_trace_id = GetExecutionTraceId(execution_trace); + auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); + if (processed_execution_trace_i == m_processed_execution_traces.end()) { + std::vector tmp_block_list; + + auto split_trace = false; + auto block_i = m_block_table.end(); + auto trace_block_i = execution_trace.blocks.begin(); + for (; trace_block_i != execution_trace.blocks.end(); trace_block_i++) { + if (trace_block_i->type == BlockId::Type::Exit) { + block_i = m_block_table.end(); + split_trace = true; + } else if (block_i == m_block_table.end()) { + BlockEntry key(trace_block_i->address); + + block_i = m_block_table.find(&key); + if (block_i == m_block_table.end()) { + block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.address)); + } + + (*block_i)->is_function_start = key.address == execution_trace.function_address; + tmp_block_list.push_back(*block_i); + } + + if (block_i != m_block_table.end()) { + BlockId next_block; + if (trace_block_i + 1 != execution_trace.blocks.end()) { + next_block = *(trace_block_i + 1); + } else { + if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { + next_block = *(execution_trace.blocks.begin()); + } else { + next_block.address = 0; + next_block.type = BlockId::Type::Exit; + } + } + + UpdateControlFlowGraph((*block_i)->cfg, *trace_block_i, next_block); + } + } + + processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list))); } - block_i->second.num_hits++; - auto execution_trace_id = GetExecutionTraceId(execution_trace); - auto execution_trace_i = block_i->second.execution_traces.find(execution_trace_id); - if (execution_trace_i == block_i->second.execution_traces.end()) { - block_i->second.execution_traces.insert(std::make_pair(execution_trace_id, execution_trace)); + for (auto i = processed_execution_trace_i->second.begin(); i != processed_execution_trace_i->second.end(); i++) { + if (!(*i)->is_compiled) { + (*i)->num_hits++; + if ((*i)->num_hits >= 1) { // TODO: Make this configurable + CompileBlock(*(*i), false); + (*i)->is_compiled = true; + } + } } - if (!block_i->second.is_compiled && block_i->second.num_hits > 1000) { // TODO: Make threshold configurable - return block_i; - } - - return m_block_table.end(); + std::remove_if(processed_execution_trace_i->second.begin(), processed_execution_trace_i->second.end(), [](const BlockEntry * b)->bool { return b->is_compiled; }); } -void RecompilationEngine::CompileBlock(BlockTable::iterator block_i) { - auto code_fragment = BuildCodeFragmentFromBlock(block_i->second, false); +void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, BlockId block, BlockId next_block) { + if (block.type == BlockId::Type::Exit && next_block.type == BlockId::Type::Exit) { + return; + } + + if (block.type == BlockId::Type::FunctionCall) { + return; + } + + auto block_i = std::find_if(cfg.begin(), cfg.end(), [&block](const ControlFlowGraph::value_type & v)->bool { return v.first == block.address; }); + if (block.type == BlockId::Type::Normal && block_i == cfg.end()) { + block_i = cfg.insert(cfg.end(), std::make_pair(block.address, std::vector())); + } + + if (block_i != cfg.end() && next_block.address && next_block.type != BlockId::Type::Exit) { + auto next_block_i = std::find(block_i->second.begin(), block_i->second.end(), next_block); + if (next_block_i == block_i->second.end()) { + block_i->second.push_back(next_block); + } + } } -CodeFragment RecompilationEngine::BuildCodeFragmentFromBlock(const BlockEntry & block_entry, bool force_inline) { - CodeFragment code_fragment; - //std::vector queue; +void RecompilationEngine::CompileBlock(const BlockEntry & block_entry, bool inline_referenced_blocks) { + std::string cfg_str; + for (auto i = block_entry.cfg.begin(); i != block_entry.cfg.end(); i++) { + cfg_str += fmt::Format("0x%08X ->", i->first); + for (auto j = i->second.begin(); j != i->second.end(); j++) { + cfg_str += " " + j->ToString(); + } - //queue.push_back(&block_entry); - //for (auto q = queue.begin(); q != queue.end(); q++) { - // for (auto i = (*q)->execution_traces.begin(); i != (*q)->execution_traces.end(); i++) { - // for (auto j = i->second->blocks.begin(); j != i->second->blocks.end(); j++) { - // auto k = std::find_if(code_fragment.begin(), code_fragment.end(), - // [&j](const CodeFragment::value_type & v)->bool { return v.first.address == j->address; }); - // if (k == code_fragment.end()) { - // code_fragment.push_back(std::make_pair(*j, std::vector())); - // k = code_fragment.end() - 1; - // } - - // if ((j + 1) != i->second->blocks.end()) { - // auto l = std::find(k->second.begin(), k->second.end(), *(j + 1)); - // if (l == k->second.end()) { - // k->second.push_back(*(j + 1)); - // } - // } - - // if (force_inline && j->type == BlockId::Type::Normal) { - // auto block_i = m_block_table.find(j->address); - // if (block_i != m_block_table.end()) { - // if (std::find(queue.begin(), queue.end(), block_i->second) == queue.end()) { - // queue.push_back(&(block_i->second)); - // } - // } - // } - // } - // } - //} - - return code_fragment; + if (i != (block_entry.cfg.end() - 1)) { + cfg_str += "\n"; + } + } + LOG_NOTICE(PPU, "Compile: %c:0x%08X, NumHits=%u\n%s", block_entry.is_function_start ? 'F' : 'N', block_entry.address, block_entry.num_hits, cfg_str.c_str()); } std::shared_ptr RecompilationEngine::GetInstance() { + std::lock_guard lock(s_mutex); + if (s_the_instance == nullptr) { - std::lock_guard lock(s_mutex); - s_the_instance = new RecompilationEngine(); + s_the_instance = std::shared_ptr(new RecompilationEngine()); } - return std::shared_ptr(s_the_instance); + return s_the_instance; } Tracer::Tracer() @@ -4986,10 +5007,9 @@ void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { function = m_stack.back(); m_stack.pop_back(); - execution_trace = new ExecutionTrace(); - execution_trace->type = ExecutionTrace::Type::Linear; - execution_trace->function_address = m_trace[function].address; - execution_trace->previous_block_address = 0; + execution_trace = new ExecutionTrace(); + execution_trace->type = ExecutionTrace::Type::Linear; + execution_trace->function_address = m_trace[function].address; std::copy(m_trace.begin() + function, m_trace.end(), std::back_inserter(execution_trace->blocks)); m_trace.erase(m_trace.begin() + function, m_trace.end()); break; @@ -4999,10 +5019,9 @@ void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { for (int i = (int)m_trace.size() - 1; i >= function; i--) { if (m_trace[i].address == arg1 && m_trace[i].type == BlockId::Type::Normal) { // Found a loop within the current function - execution_trace = new ExecutionTrace(); - execution_trace->type = ExecutionTrace::Type::Loop; - execution_trace->function_address = m_trace[function].address; - execution_trace->previous_block_address = i == function ? 0 : m_trace[i - 1].address; + execution_trace = new ExecutionTrace(); + execution_trace->type = ExecutionTrace::Type::Loop; + execution_trace->function_address = m_trace[function].address; std::copy(m_trace.begin() + i, m_trace.end(), std::back_inserter(execution_trace->blocks)); m_trace.erase(m_trace.begin() + i + 1, m_trace.end()); break; @@ -5021,6 +5040,18 @@ void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { block_id.address = arg1; block_id.type = BlockId::Type::Exit; m_trace.push_back(block_id); + + if (arg1 == 0) { + // Return from function + function = m_stack.back(); + m_stack.pop_back(); + + execution_trace = new ExecutionTrace(); + execution_trace->type = ExecutionTrace::Type::Linear; + execution_trace->function_address = m_trace[function].address; + std::copy(m_trace.begin() + function, m_trace.end(), std::back_inserter(execution_trace->blocks)); + m_trace.erase(m_trace.begin() + function, m_trace.end()); + } break; default: assert(0); @@ -5028,17 +5059,8 @@ void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { } if (execution_trace) { - auto s = fmt::Format("Trace: 0x%08X, 0x%08X, %s -> ", execution_trace->function_address, execution_trace->previous_block_address, - execution_trace->type == ExecutionTrace::Type::Loop ? "Loop" : "Linear"); - for (auto i = 0; i < execution_trace->blocks.size(); i++) {; - s += fmt::Format("%c:0x%08X ", - execution_trace->blocks[i].type == BlockId::Type::Normal ? 'N' : - execution_trace->blocks[i].type == BlockId::Type::FunctionCall ? 'F' : 'E', - execution_trace->blocks[i].address); - } - - LOG_NOTICE(PPU, s.c_str()); - //m_recompilation_engine->NotifyTrace(execution_trace); + LOG_NOTICE(PPU, "Trace: %s", execution_trace->ToString().c_str()); + m_recompilation_engine->NotifyTrace(execution_trace); } } @@ -5056,6 +5078,7 @@ ppu_recompiler_llvm::ExecutionEngine::ExecutionEngine(PPUThread & ppu) } ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() { + } u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { @@ -5107,44 +5130,53 @@ u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(ExecutionEngine * exec u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer) { bool terminate = false; + bool returned = false; - while (!terminate) { - auto instruction = re32(vm::get_ref(ppu_state->PC)); - execution_engine->m_decoder.Decode(instruction); - auto is_branch = ppu_state->m_is_branch; - ppu_state->NextPc(4); + while (!terminate && !Emu.IsStopped()) { + if (Emu.IsPaused()) { + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + continue; + } - if (is_branch) { - Executable executable; - auto branch_type = GetBranchTypeFromInstruction(instruction); + BranchType branch_type; + if (!returned) { + auto instruction = re32(vm::get_ref(ppu_state->PC)); + execution_engine->m_decoder.Decode(instruction); + branch_type = ppu_state->m_is_branch ? GetBranchTypeFromInstruction(instruction) : BranchType::NonBranch; + ppu_state->NextPc(4); + } else { + returned = false; + branch_type = BranchType::LocalBranch; + } - switch (branch_type) { - case BranchType::Return: - tracer->Trace(Tracer::TraceType::Return, 0, 0); - terminate = true; - break; - case BranchType::FunctionCall: - tracer->Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); - executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction); - executable(execution_engine, ppu_state, interpreter, tracer); - // Fallthrough - case BranchType::LocalBranch: - tracer->Trace(Tracer::TraceType::EnterBlock, ppu_state->PC, 0); - executable = execution_engine->GetExecutable(ppu_state->PC, nullptr); - if (executable != nullptr) { - auto exit_block = executable(execution_engine, ppu_state, interpreter, tracer); - if (exit_block) { - tracer->Trace(Tracer::TraceType::ExitFromCompiledBlock, (u32)exit_block, 0); - } else { - tracer->Trace(Tracer::TraceType::Return, 0, 0); - terminate = true; - } + Executable executable; + switch (branch_type) { + case BranchType::Return: + tracer->Trace(Tracer::TraceType::Return, 0, 0); + terminate = true; + break; + case BranchType::FunctionCall: + tracer->Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); + executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction); + executable(execution_engine, ppu_state, interpreter, tracer); + returned = true; + break; + case BranchType::LocalBranch: + tracer->Trace(Tracer::TraceType::EnterBlock, ppu_state->PC, 0); + executable = execution_engine->GetExecutable(ppu_state->PC, nullptr); + if (executable != nullptr) { + auto exit_block = executable(execution_engine, ppu_state, interpreter, tracer); + tracer->Trace(Tracer::TraceType::ExitFromCompiledBlock, (u32)exit_block, 0); + if (exit_block == 0) { + terminate = true; } - break; - default: - assert(0); - break; } + break; + case BranchType::NonBranch: + break; + default: + assert(0); + break; } } @@ -5170,6 +5202,12 @@ BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) { return type; } -ExecutionTraceId ppu_recompiler_llvm::GetExecutionTraceId(const ExecutionTrace * execution_trace) { - return 0; +ExecutionTraceId ppu_recompiler_llvm::GetExecutionTraceId(const ExecutionTrace & execution_trace) { + ExecutionTraceId id = 0; + + for (auto i = execution_trace.blocks.begin(); i != execution_trace.blocks.end(); i++) { + id = (id << 8) ^ ((u64)i->address << 32 | _byteswap_ulong((u64)i->address)); + } + + return id; } diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index c3e4511da2..5f27cd540e 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -16,6 +16,7 @@ namespace ppu_recompiler_llvm { class RecompilationEngine; class Tracer; class ExecutionEngine; + struct PPUState; enum class BranchType { NonBranch, @@ -35,8 +36,19 @@ namespace ppu_recompiler_llvm { Normal, Exit, } type; + + bool operator == (const BlockId & other) const { + return (address == other.address && type == other.type); + } + + std::string ToString() const { + return fmt::Format("%c:0x%08X", type == BlockId::Type::Normal ? 'N' : type == BlockId::Type::FunctionCall ? 'F' : 'E', address); + } }; + /// Control flow graph of a block. A list of (block address, list of next blocks) pairs. + typedef std::vector>> ControlFlowGraph; + /// Uniquely identifies an execution trace typedef u64 ExecutionTraceId; @@ -45,9 +57,6 @@ namespace ppu_recompiler_llvm { /// The function to which this trace belongs u32 function_address; - /// The address of the block that came before this trace - u32 previous_block_address; - /// Execution trace type enum class Type { Linear, @@ -56,16 +65,58 @@ namespace ppu_recompiler_llvm { /// Sequence of blocks enountered in this trace std::vector blocks; - }; - /// A fragment of PPU code. A list of (block, list of next blocks) pairs. - typedef std::vector>> CodeFragment; + std::string ToString() const { + auto s = fmt::Format("0x%08X %s ->", function_address, type == ExecutionTrace::Type::Loop ? "Loop" : "Linear"); + for (auto i = 0; i < blocks.size(); i++) { + s += " " + blocks[i].ToString(); + } + + return s; + } + }; /// Pointer to an executable typedef u64(*Executable)(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer); - struct PPUState; + /// An entry in the block table + struct BlockEntry { + /// Address of the block + u32 address; + /// Number of times this block was hit + u32 num_hits; + + /// The CFG for this block + ControlFlowGraph cfg; + + /// Indicates whether the block has been compiled or not + bool is_compiled; + + /// Indicates whether the block is the first block of a function or not + bool is_function_start; + + BlockEntry(u32 addr) + : address(addr) + , num_hits(0) + , is_compiled(false) { + } + + bool operator == (const BlockEntry & other) const { + return address == other.address; + } + }; +} + +namespace std { + template<> struct hash { + size_t operator()(const ppu_recompiler_llvm::BlockEntry * e) const { + return e->address; + } + }; +} + +namespace ppu_recompiler_llvm { /// PPU compiler that uses LLVM for code generation and optimization class Compiler : protected PPUOpcodes, protected PPCDecoder { public: @@ -97,10 +148,10 @@ namespace ppu_recompiler_llvm { Compiler & operator = (Compiler && other) = delete; /// Compile a code fragment and obtain an executable - Executable Compile(const std::string & name, const CodeFragment & code_fragment); + //Executable Compile(const std::string & name, const CodeFragment & code_fragment); /// Free an executable earilier obtained from the Compile function - void FreeCompiledCodeFragment(Executable executable); + //void FreeCompiledCodeFragment(Executable executable); /// Retrieve compiler stats Stats GetStats(); @@ -742,24 +793,6 @@ namespace ppu_recompiler_llvm { static std::shared_ptr GetInstance(); private: - /// An entry in the block table - struct BlockEntry { - BlockEntry(); - ~BlockEntry(); - - /// Number of times this block was hit - u32 num_hits; - - /// Execution traces starting at this block - std::unordered_map execution_traces; - - /// Indicates whether the block has been compiled or not - bool is_compiled; - }; - - /// Block table type. Key is block address. - typedef std::unordered_map BlockTable; - RecompilationEngine(); RecompilationEngine(const RecompilationEngine & other) = delete; @@ -768,23 +801,26 @@ namespace ppu_recompiler_llvm { RecompilationEngine & operator = (const RecompilationEngine & other) = delete; RecompilationEngine & operator = (RecompilationEngine && other) = delete; - /// Process an execution trace. Returns an iterator to a block table entry if the block should be compiled. - BlockTable::iterator ProcessExecutionTrace(ExecutionTrace * execution_trace); + /// Process an execution trace. + void ProcessExecutionTrace(const ExecutionTrace & execution_trace); + + /// Update a CFG + void UpdateControlFlowGraph(ControlFlowGraph & cfg, BlockId block, BlockId next_block); /// Compile a block - void CompileBlock(BlockTable::iterator block_i); - - /// Build code fragment from a block - CodeFragment BuildCodeFragmentFromBlock(const BlockEntry & block_entry, bool force_inline); + void CompileBlock(const BlockEntry & block_entry, bool inline_referenced_blocks); /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. std::mutex m_pending_execution_traces_lock; - /// Queue of execution traces pending prcessing + /// Queue of execution traces pending processing std::list m_pending_execution_traces; /// Block table - BlockTable m_block_table; + std::unordered_set m_block_table; + + /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. + std::unordered_map> m_processed_execution_traces; /// PPU Compiler Compiler m_compiler; @@ -793,7 +829,7 @@ namespace ppu_recompiler_llvm { static std::mutex s_mutex; /// The instance - static RecompilationEngine * s_the_instance; + static std::shared_ptr s_the_instance; }; /// Finds interesting execution sequences @@ -894,7 +930,7 @@ namespace ppu_recompiler_llvm { BranchType GetBranchTypeFromInstruction(u32 instruction); /// Get the execution trace id of an execution trace - ExecutionTraceId GetExecutionTraceId(const ExecutionTrace * execution_trace); + ExecutionTraceId GetExecutionTraceId(const ExecutionTrace & execution_trace); } #endif // PPU_LLVM_RECOMPILER_H diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h index 7e67a2723a..88e422e2a3 100644 --- a/rpcs3/stdafx.h +++ b/rpcs3/stdafx.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "Utilities/GNU.h" From ee6a239679eade4c1f4211c5489d31c30e0fade7 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Fri, 7 Nov 2014 16:24:59 +0530 Subject: [PATCH 05/27] Generate code from a CFG --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 657 ++++++++++++++++----------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 192 +++++--- 2 files changed, 531 insertions(+), 318 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 6adfdaed00..912d457a1b 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -5,7 +5,6 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/Host.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/CodeGen/MachineCodeInfo.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/IR/Intrinsics.h" @@ -19,6 +18,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Vectorize.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/IR/Verifier.h" using namespace llvm; using namespace ppu_recompiler_llvm; @@ -26,7 +26,10 @@ using namespace ppu_recompiler_llvm; u64 Compiler::s_rotate_mask[64][64]; bool Compiler::s_rotate_mask_inited = false; -Compiler::Compiler() { +Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable default_function_executable, const Executable default_block_executable) + : m_recompilation_engine(recompilation_engine) + , m_default_function_executable(default_function_executable) + , m_default_block_executable(default_block_executable) { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetDisassembler(); @@ -63,6 +66,13 @@ Compiler::Compiler() { m_fpm->add(createCFGSimplificationPass()); m_fpm->doInitialization(); + std::vector arg_types; + arg_types.push_back(m_ir_builder->getInt64Ty()->getPointerTo()); + arg_types.push_back(m_ir_builder->getInt64Ty()->getPointerTo()); + arg_types.push_back(m_ir_builder->getInt64Ty()->getPointerTo()); + arg_types.push_back(m_ir_builder->getInt64Ty()->getPointerTo()); + m_compiled_function_type = FunctionType::get(m_ir_builder->getInt64Ty(), arg_types, false); + if (!s_rotate_mask_inited) { InitRotateMask(); s_rotate_mask_inited = true; @@ -76,124 +86,188 @@ Compiler::~Compiler() { delete m_llvm_context; } -//Executable Compiler::Compile(const std::string & name, const CodeFragment & code_fragment) { -// assert(!name.empty()); -// assert(!code_fragment.empty()); -// -// auto compilation_start = std::chrono::high_resolution_clock::now(); -// -// // Create the function -// m_current_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(), -// m_ir_builder->getInt8PtrTy() /*ppu_state*/, -// m_ir_builder->getInt8PtrTy() /*interpreter*/, -// m_ir_builder->getInt8PtrTy() /*tracer*/, nullptr); -// m_current_function->setCallingConv(CallingConv::X86_64_Win64); -// auto arg_i = m_current_function->arg_begin(); -// arg_i->setName("ppu_state"); -// (++arg_i)->setName("interpreter"); -// (++arg_i)->setName("tracer"); -// -// // Create the entry block -// GetBasicBlockFromAddress(0, m_current_function, true); -// -// // Create basic blocks for each instruction -// for (auto i = code_fragment.begin(); i != code_fragment.end(); i++) { -// u32 address = i->first.address; -// while (1) { -// GetBasicBlockFromAddress(address, m_current_function, true); -// -// u32 instr = vm::read32(address); -// if (IsBranchInstruction(instr)) { -// break; -// } -// -// address += 4; -// } -// } -// -// // Add code to notify the tracer about this function and branch to the first instruction -// m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0, m_current_function)); -// //Call("Tracer.Trace", &Tracer::Trace, *arg_i, -// // m_ir_builder->getInt32(code_fragment[0].first.type == Function ? FunctionCall : Block), -// // m_ir_builder->getInt32(code_fragment[0].first.address)); -// m_ir_builder->CreateBr(GetBasicBlockFromAddress(code_fragment[0].first.address, m_current_function)); -// -// // Convert each block in this code fragment to LLVM IR -// for (auto i = code_fragment.begin(); i != code_fragment.end(); i++) { -// m_current_instruction_address = i->first.address; -// m_current_block_next_blocks = &(i->second); -// auto block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); -// m_ir_builder->SetInsertPoint(block); -// -// if (i != code_fragment.begin() && i->first.type == BlockId::Type::FunctionCall) { -// auto ordinal = RecompilationEngine::GetInstance()->GetOrdinal(i->first.address); -// -// } -// -// m_hit_branch_instruction = false; -// while (!m_hit_branch_instruction) { -// if (!block->getInstList().empty()) { -// break; -// } -// -// u32 instr = vm::read32(m_current_instruction_address); -// Decode(instr); -// -// m_current_instruction_address += 4; -// if (!m_hit_branch_instruction) { -// block = GetBasicBlockFromAddress(m_current_instruction_address, m_current_function); -// m_ir_builder->CreateBr(block); -// m_ir_builder->SetInsertPoint(block); -// } -// } -// } -// -// // If the function has an unknown block then add code to notify the tracer -// auto unknown_bb = GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function); -// if (!unknown_bb) { -// m_ir_builder->SetInsertPoint(unknown_bb); -// auto branch_type_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 1); -// for (auto i = pred_begin(unknown_bb); i != pred_end(unknown_bb); i++) { -// // We assume that the last but one instruction of the predecessor sets the branch type -// auto j = (*i)->rbegin(); -// j--; -// branch_type_i32->addIncoming(&(*j), *i); -// } -// -// //Call("NotifyBranch", &Tracer::NotifyBranch, *arg_i, -// // m_ir_builder->CreateZExtOrTrunc(branch_type_i32, m_ir_builder->getIntNTy(sizeof(Tracer::BranchType) * 8)), GetPc()); -// m_ir_builder->CreateRetVoid(); -// } -// -// auto ir_build_end = std::chrono::high_resolution_clock::now(); -// m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); -// -// // Optimize this function -// m_fpm->run(*m_current_function); -// auto optimize_end = std::chrono::high_resolution_clock::now(); -// m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); -// -// // Translate to machine code -// MachineCodeInfo mci; -// m_execution_engine->runJITOnFunction(m_current_function, &mci); -// auto translate_end = std::chrono::high_resolution_clock::now(); -// m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); -// -// auto compilation_end = std::chrono::high_resolution_clock::now(); -// m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); -// -// //m_compiled[(CompiledCodeFragment)mci.address()] = m_current_function; -// //return (CompiledCodeFragment)mci.address(); -// return nullptr; -//} +Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool inline_all_blocks, bool generate_linkable_exits, bool generate_trace) { + assert(!name.empty()); + assert(!cfg.empty()); -//void Compiler::FreeCompiledCodeFragment(Executable compiled_code_fragment) { -// //auto i = m_compiled.find(compiled_code_fragment); -// //if (i != m_compiled.end()) { -// // m_execution_engine->freeMachineCodeForFunction(i->second); -// // i->second->eraseFromParent(); -// //} -//} + auto compilation_start = std::chrono::high_resolution_clock::now(); + + m_state.cfg = &cfg; + m_state.inline_all_blocks = inline_all_blocks; + m_state.generate_linkable_exits = generate_linkable_exits; + m_state.generate_trace = generate_trace; + m_state.address_to_block.clear(); + + // Create the function + m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); + m_state.function->setCallingConv(CallingConv::X86_64_Win64); + auto arg_i = m_state.function->arg_begin(); + arg_i->setName("execution_engine"); + m_state.args[CompileTaskState::Args::ExecutionEngine] = arg_i; + (++arg_i)->setName("state"); + m_state.args[CompileTaskState::Args::State] = arg_i; + (++arg_i)->setName("interpreter"); + m_state.args[CompileTaskState::Args::Interpreter] = arg_i; + (++arg_i)->setName("tracer"); + m_state.args[CompileTaskState::Args::Tracer] = arg_i; + + // Create the entry block and add code to branch to the first instruction + m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0)); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg[0].first)); + + // Convert each block in this CFG to LLVM IR + for (m_state.cfg_entry = cfg.begin(); m_state.cfg_entry != cfg.end(); m_state.cfg_entry++) { + m_state.current_instruction_address = m_state.cfg_entry->first; + auto block = GetBasicBlockFromAddress(m_state.current_instruction_address); + m_ir_builder->SetInsertPoint(block); + + m_state.hit_branch_instruction = false; + if (!inline_all_blocks && m_state.cfg_entry != cfg.begin()) { + // Use an already compiled implementation of this block if available + auto ordinal = m_recompilation_engine.GetOrdinal(m_state.cfg_entry->first); + if (ordinal != 0xFFFFFFFF) { + auto ret_i64 = IndirectCall(m_state.cfg_entry->first, false); + auto switch_instr = m_ir_builder->CreateSwitch(ret_i64, GetBasicBlockFromAddress(0xFFFFFFFF)); + for (auto i = m_state.cfg_entry->second.begin(); i != m_state.cfg_entry->second.end(); i++) { + switch_instr->addCase(m_ir_builder->getInt64(i->address), GetBasicBlockFromAddress(i->address)); + } + + m_state.hit_branch_instruction = true; + } + } + + while (!m_state.hit_branch_instruction) { + if (!block->getInstList().empty()) { + break; + } + + u32 instr = re32(vm::get_ref(m_state.current_instruction_address)); + Decode(instr); + + if (!m_state.hit_branch_instruction) { + m_state.current_instruction_address += 4; + block = GetBasicBlockFromAddress(m_state.current_instruction_address); + m_ir_builder->CreateBr(block); + m_ir_builder->SetInsertPoint(block); + } + } + } + + m_recompilation_engine.Log() << *m_state.function; + + auto default_exit_block_name = GetBasicBlockNameFromAddress(0xFFFFFFFF); + for (auto block_i = m_state.function->begin(); block_i != m_state.function->end(); block_i++) { + if (!block_i->getInstList().empty() || block_i->getName() == default_exit_block_name) { + continue; + } + + // An empty block. Generate exit logic. + m_recompilation_engine.Log() << "Empty block: " << block_i->getName() << "\n"; + + m_ir_builder->SetInsertPoint(block_i); + auto exit_block_i64 = m_ir_builder->CreatePHI(m_ir_builder->getInt64Ty(), 0); + for (auto i = pred_begin(block_i); i != pred_end(block_i); i++) { + auto pred_address = GetAddressFromBasicBlockName(block_i->getName()); + exit_block_i64->addIncoming(m_ir_builder->getInt64(m_state.address_to_block[pred_address]), *i); + } + + auto block_address = GetAddressFromBasicBlockName(block_i->getName()); + SetPc(m_ir_builder->getInt32(block_address)); + + if (generate_linkable_exits) { + if (generate_trace) { + Call("Tracer.Trace", &Tracer::Trace, m_ir_builder->getInt32((uint32_t)Tracer::TraceType::ExitFromCompiledFunction), + m_ir_builder->getInt32(cfg[0].first), m_ir_builder->CreateTrunc(exit_block_i64, m_ir_builder->getInt32Ty())); + } + + auto ret_i64 = IndirectCall(block_address, false); + auto cmp_i1 = m_ir_builder->CreateICmpNE(ret_i64, m_ir_builder->getInt64(0)); + auto then_bb = BasicBlock::Create(m_ir_builder->getContext()); + auto merge_bb = BasicBlock::Create(m_ir_builder->getContext()); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + + m_ir_builder->SetInsertPoint(then_bb); + IndirectCall(1, false); + m_ir_builder->CreateBr(merge_bb); + + m_ir_builder->SetInsertPoint(merge_bb); + m_ir_builder->CreateRet(m_ir_builder->getInt64(0)); + } else { + m_ir_builder->CreateRet(exit_block_i64); + } + } + + m_recompilation_engine.Log() << *m_state.function; + + // If the function has a default exit block then generate code for it + auto default_exit_bb = GetBasicBlockFromAddress(0xFFFFFFFF, false); + if (default_exit_bb) { + m_ir_builder->SetInsertPoint(default_exit_bb); + auto exit_block_i64 = m_ir_builder->CreatePHI(m_ir_builder->getInt64Ty(), 1); + for (auto i = pred_begin(default_exit_bb); i != pred_end(default_exit_bb); i++) { + // the last but one instruction of the predecessor sets the exit block address + auto j = (*i)->rbegin(); + j++; + exit_block_i64->addIncoming(&(*j), *i); + } + + if (generate_linkable_exits) { + auto cmp_i1 = m_ir_builder->CreateICmpNE(exit_block_i64, m_ir_builder->getInt64(0)); + auto then_bb = BasicBlock::Create(m_ir_builder->getContext()); + auto merge_bb = BasicBlock::Create(m_ir_builder->getContext()); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + + m_ir_builder->SetInsertPoint(then_bb); + if (generate_trace) { + Call("Tracer.Trace", &Tracer::Trace, m_ir_builder->getInt32((uint32_t)Tracer::TraceType::ExitFromCompiledFunction), + m_ir_builder->getInt32(cfg[0].first), m_ir_builder->CreateTrunc(exit_block_i64, m_ir_builder->getInt32Ty())); + } + + IndirectCall(1, false); + m_ir_builder->CreateBr(merge_bb); + + m_ir_builder->SetInsertPoint(merge_bb); + m_ir_builder->CreateRet(m_ir_builder->getInt64(0)); + } else { + m_ir_builder->CreateRet(exit_block_i64); + } + } + + m_recompilation_engine.Log() << *m_state.function; + + std::string verify; + raw_string_ostream verify_ostream(verify); + if (verifyFunction(*m_state.function, &verify_ostream)) { + m_recompilation_engine.Log() << "Verification failed: " << verify << "\n"; + } + + auto ir_build_end = std::chrono::high_resolution_clock::now(); + m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); + + // Optimize this function + //m_fpm->run(*m_state.function); + auto optimize_end = std::chrono::high_resolution_clock::now(); + m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); + + // Translate to machine code + MachineCodeInfo mci; + m_execution_engine->runJITOnFunction(m_state.function, &mci); + auto translate_end = std::chrono::high_resolution_clock::now(); + m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); + + auto compilation_end = std::chrono::high_resolution_clock::now(); + m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); + + return (Executable)mci.address(); +} + +void Compiler::FreeExecutable(const std::string & name) { + auto function = m_module->getFunction(name); + if (function) { + m_execution_engine->freeMachineCodeForFunction(function); + function->eraseFromParent(); + } +} Compiler::Stats Compiler::GetStats() { return m_stats; @@ -1500,7 +1574,7 @@ void Compiler::ADDIS(u32 rd, u32 ra, s32 simm16) { } void Compiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { - auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_current_instruction_address, bd)); + auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, bd)); CreateBranch(CheckBranchCondition(bo, bi), target_i64, lk ? true : false); //m_hit_branch_instruction = true; //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); @@ -1514,7 +1588,7 @@ void Compiler::SC(u32 sc_code) { } void Compiler::B(s32 ll, u32 aa, u32 lk) { - auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_current_instruction_address, ll)); + auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, ll)); CreateBranch(nullptr, target_i64, lk ? true : false); //m_hit_branch_instruction = true; //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); @@ -2009,13 +2083,13 @@ void Compiler::LWARX(u32 rd, u32 ra, u32 rb) { addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); } - auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, R_ADDR)); + auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_ADDR)); auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(addr_i64, resv_addr_i64_ptr, 8); auto resv_val_i32 = ReadMemory(addr_i64, 32, 4, false, false); auto resv_val_i64 = m_ir_builder->CreateZExt(resv_val_i32, m_ir_builder->getInt64Ty()); - auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, R_VALUE)); + auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_VALUE)); auto resv_val_i64_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(resv_val_i64, resv_val_i64_ptr, 8); @@ -2208,6 +2282,7 @@ void Compiler::LDUX(u32 rd, u32 ra, u32 rb) { void Compiler::DCBST(u32 ra, u32 rb) { // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); //InterpreterCall("DCBST", &PPUInterpreter::DCBST, ra, rb); } @@ -2301,12 +2376,12 @@ void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); } - auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, R_ADDR)); + auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_ADDR)); auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(addr_i64, resv_addr_i64_ptr, 8); auto resv_val_i64 = ReadMemory(addr_i64, 64, 8, false); - auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, R_VALUE)); + auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_VALUE)); auto resv_val_i64_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(resv_val_i64, resv_val_i64_ptr, 8); @@ -2317,6 +2392,7 @@ void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { void Compiler::DCBF(u32 ra, u32 rb) { // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); //InterpreterCall("DCBF", &PPUInterpreter::DCBF, ra, rb); } @@ -2596,6 +2672,7 @@ void Compiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { void Compiler::DCBTST(u32 ra, u32 rb, u32 th) { // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); //InterpreterCall("DCBTST", &PPUInterpreter::DCBTST, ra, rb, th); } @@ -2627,6 +2704,7 @@ void Compiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { void Compiler::DCBT(u32 ra, u32 rb, u32 th) { // TODO: Implement this using prefetch + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); //InterpreterCall("DCBT", &PPUInterpreter::DCBT, ra, rb, th); } @@ -4070,24 +4148,40 @@ void Compiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { //InterpreterCall("UNK", &PPUInterpreter::UNK, code, opcode, gcode); } -std::string Compiler::GetBasicBlockNameFromAddress(u32 address) { +std::string Compiler::GetBasicBlockNameFromAddress(u32 address, const std::string & suffix) { std::string name; if (address == 0) { name = "entry"; } else if (address == 0xFFFFFFFF) { - name = "unknown"; + name = "default_exit"; } else { - name = fmt::Format("instr_0x%X", address); + name = fmt::Format("instr_0x%08X", address); + } + + if (suffix != "") { + name += "_" + suffix; } return name; } -BasicBlock * Compiler::GetBasicBlockFromAddress(u32 address, Function * function, bool create_if_not_exist) { - auto block_name = GetBasicBlockNameFromAddress(address); +u32 Compiler::GetAddressFromBasicBlockName(const std::string & name) { + if (name.compare(0, 6, "instr_") == 0) { + return strtoul(name.c_str() + 6, nullptr, 0); + } else if (name == GetBasicBlockNameFromAddress(0)) { + return 0; + } else if (name == GetBasicBlockNameFromAddress(0xFFFFFFFF)) { + return 0xFFFFFFFF; + } + + return 0; +} + +BasicBlock * Compiler::GetBasicBlockFromAddress(u32 address, const std::string & suffix, bool create_if_not_exist) { + auto block_name = GetBasicBlockNameFromAddress(address, suffix); BasicBlock * block = nullptr; - for (auto i = function->getBasicBlockList().begin(); i != function->getBasicBlockList().end(); i++) { + for (auto i = m_state.function->getBasicBlockList().begin(); i != m_state.function->getBasicBlockList().end(); i++) { if (i->getName() == block_name) { block = &(*i); break; @@ -4095,29 +4189,12 @@ BasicBlock * Compiler::GetBasicBlockFromAddress(u32 address, Function * function } if (!block && create_if_not_exist) { - block = BasicBlock::Create(m_ir_builder->getContext(), block_name, function); + block = BasicBlock::Create(m_ir_builder->getContext(), block_name, m_state.function); } return block; } -Value * Compiler::GetPPUStateArg() { - return m_current_function->arg_begin(); -} - -Value * Compiler::GetInterpreterArg() { - auto i = m_current_function->arg_begin(); - i++; - return i; -} - -Value * Compiler::GetTracerArg() { - auto i = m_current_function->arg_begin(); - i++; - i++; - return i; -} - Value * Compiler::GetBit(Value * val, u32 n) { Value * bit; @@ -4231,33 +4308,33 @@ Value * Compiler::SetNibble(Value * val, u32 n, Value * b0, Value * b1, Value * } Value * Compiler::GetPc() { - auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, PC)); + auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(pc_i32_ptr, 4); } void Compiler::SetPc(Value * val_ix) { - auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, PC)); + auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_ix, m_ir_builder->getInt32Ty()); m_ir_builder->CreateAlignedStore(val_i32, pc_i32_ptr, 4); } Value * Compiler::GetGpr(u32 r, u32 num_bits) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, GPR[r])); + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); auto r_ix_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getIntNTy(num_bits)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(r_ix_ptr, 8); } void Compiler::SetGpr(u32 r, Value * val_x64) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, GPR[r])); + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); m_ir_builder->CreateAlignedStore(val_i64, r_i64_ptr, 8); } Value * Compiler::GetCr() { - auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, CR)); + auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(cr_i32_ptr, 4); } @@ -4268,7 +4345,7 @@ Value * Compiler::GetCrField(u32 n) { void Compiler::SetCr(Value * val_x32) { auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, CR)); + auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i32, cr_i32_ptr, 4); } @@ -4310,33 +4387,33 @@ void Compiler::SetCr6AfterVectorCompare(u32 vr) { } Value * Compiler::GetLr() { - auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, LR)); + auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(lr_i64_ptr, 8); } void Compiler::SetLr(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, LR)); + auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, lr_i64_ptr, 8); } Value * Compiler::GetCtr() { - auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, CTR)); + auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(ctr_i64_ptr, 8); } void Compiler::SetCtr(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, CTR)); + auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, ctr_i64_ptr, 8); } Value * Compiler::GetXer() { - auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, XER)); + auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(xer_i64_ptr, 8); } @@ -4351,7 +4428,7 @@ Value * Compiler::GetXerSo() { void Compiler::SetXer(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, XER)); + auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, xer_i64_ptr, 8); } @@ -4369,20 +4446,20 @@ void Compiler::SetXerSo(Value * so) { } Value * Compiler::GetUsprg0() { - auto usrpg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, USPRG0)); + auto usrpg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, USPRG0)); auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usrpg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(usprg0_i64_ptr, 8); } void Compiler::SetUsprg0(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto usprg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, USPRG0)); + auto usprg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, USPRG0)); auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usprg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, usprg0_i64_ptr, 8); } Value * Compiler::GetFpr(u32 r, u32 bits, bool as_int) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, FPR[r])); + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); if (!as_int) { auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); auto r_f64 = m_ir_builder->CreateAlignedLoad(r_f64_ptr, 8); @@ -4403,7 +4480,7 @@ Value * Compiler::GetFpr(u32 r, u32 bits, bool as_int) { } void Compiler::SetFpr(u32 r, Value * val) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, FPR[r])); + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); Value* val_f64; @@ -4420,47 +4497,47 @@ void Compiler::SetFpr(u32 r, Value * val) { } Value * Compiler::GetVscr() { - auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VSCR)); + auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vscr_i32_ptr, 4); } void Compiler::SetVscr(Value * val_x32) { auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VSCR)); + auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i32, vscr_i32_ptr, 4); } Value * Compiler::GetVr(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vr_i128_ptr, 16); } Value * Compiler::GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); auto vr_vec_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getIntNTy(vec_elt_num_bits), 128 / vec_elt_num_bits)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vr_vec_ptr, 16); } Value * Compiler::GetVrAsFloatVec(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); auto vr_v4f32_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getFloatTy(), 4)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vr_v4f32_ptr, 16); } Value * Compiler::GetVrAsDoubleVec(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); auto vr_v2f64_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getDoubleTy(), 2)->getPointerTo()); return m_ir_builder->CreateAlignedLoad(vr_v2f64_ptr, 16); } void Compiler::SetVr(u32 vr, Value * val_x128) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUStateArg(), (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); auto val_i128 = m_ir_builder->CreateBitCast(val_x128, m_ir_builder->getIntNTy(128)); m_ir_builder->CreateAlignedStore(val_i128, vr_i128_ptr, 16); @@ -4510,7 +4587,7 @@ Value * Compiler::CheckBranchCondition(u32 bo, u32 bi) { void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk, bool target_is_lr) { if (lk) { - SetLr(m_ir_builder->getInt64(m_current_instruction_address + 4)); + SetLr(m_ir_builder->getInt64(m_state.current_instruction_address + 4)); } auto current_block = m_ir_builder->GetInsertBlock(); @@ -4519,49 +4596,69 @@ void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool if (dyn_cast(target_i64)) { // Target address is an immediate value. u32 target_address = (u32)(dyn_cast(target_i64)->getLimitedValue()); - target_block = GetBasicBlockFromAddress(target_address, m_current_function); - if (!target_block) { - target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function); - m_ir_builder->SetInsertPoint(target_block); + if (lk) { + // Function call + if (cmp_i1) { // There is no need to create a new block for an unconditional jump + target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function); + m_ir_builder->SetInsertPoint(target_block); + } + SetPc(target_i64); - m_ir_builder->CreateBr(GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function, true)); + IndirectCall(target_address, true); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); + } else { + // Local branch + target_block = GetBasicBlockFromAddress(target_address); } } else { // Target address is in a register - target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function); - m_ir_builder->SetInsertPoint(target_block); + if (cmp_i1) { // There is no need to create a new block for an unconditional jump + target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function); + m_ir_builder->SetInsertPoint(target_block); + } + SetPc(target_i64); if (target_is_lr && !lk) { // Return from function call - m_ir_builder->CreateRetVoid(); + m_ir_builder->CreateRet(m_ir_builder->getInt64(0)); + } else if (lk) { + auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); + auto call_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function); + m_ir_builder->CreateOr(m_ir_builder->getInt64(m_state.cfg_entry->first), (uint64_t)0); + auto switch_instr = m_ir_builder->CreateSwitch(target_i64, call_block); + m_ir_builder->SetInsertPoint(call_block); + IndirectCall(0, true); + m_ir_builder->CreateBr(next_block); + for (auto i = m_state.cfg_entry->second.begin(); i != m_state.cfg_entry->second.end(); i++) { + call_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function); + m_ir_builder->SetInsertPoint(call_block); + IndirectCall(i->address, true); + m_ir_builder->CreateBr(next_block); + switch_instr->addCase(m_ir_builder->getInt32(i->address), call_block); + } } else { - auto switch_instr = m_ir_builder->CreateSwitch(target_i64, GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function, true)); - for (auto i = m_current_block_next_blocks->begin(); i != m_current_block_next_blocks->end(); i++) { - switch_instr->addCase(m_ir_builder->getInt32(i->address), GetBasicBlockFromAddress(i->address, m_current_function)); + auto switch_instr = m_ir_builder->CreateSwitch(target_i64, GetBasicBlockFromAddress(0xFFFFFFFF)); + for (auto i = m_state.cfg_entry->second.begin(); i != m_state.cfg_entry->second.end(); i++) { + switch_instr->addCase(m_ir_builder->getInt64(i->address), GetBasicBlockFromAddress(i->address)); } } } if (cmp_i1) { // Conditional branch - auto next_block = GetBasicBlockFromAddress(m_current_instruction_address + 4, m_current_function); - if (!next_block) { - next_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function); - m_ir_builder->SetInsertPoint(next_block); - SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4)); - m_ir_builder->CreateBr(GetBasicBlockFromAddress(0xFFFFFFFF, m_current_function, true)); - } - + auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); m_ir_builder->SetInsertPoint(current_block); m_ir_builder->CreateCondBr(cmp_i1, target_block, next_block); } else { // Unconditional branch - m_ir_builder->SetInsertPoint(current_block); - m_ir_builder->CreateBr(target_block); + if (target_block) { + m_ir_builder->SetInsertPoint(current_block); + m_ir_builder->CreateBr(target_block); + } } - m_hit_branch_instruction = true; + m_state.hit_branch_instruction = true; } Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bswap, bool could_be_mmio) { @@ -4576,10 +4673,10 @@ Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bsw return val_ix; } else { BasicBlock * next_block = nullptr; - for (auto i = m_current_function->begin(); i != m_current_function->end(); i++) { + for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { if (&(*i) == m_ir_builder->GetInsertBlock()) { i++; - if (i != m_current_function->end()) { + if (i != m_state.function->end()) { next_block = &(*i); } @@ -4588,9 +4685,9 @@ Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bsw } auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR)); - auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block); - auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block); - auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block); + auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); + auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); + auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb); m_ir_builder->SetInsertPoint(then_bb); @@ -4630,10 +4727,10 @@ void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool m_ir_builder->CreateAlignedStore(val_ix, eaddr_ix_ptr, alignment); } else { BasicBlock * next_block = nullptr; - for (auto i = m_current_function->begin(); i != m_current_function->end(); i++) { + for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { if (&(*i) == m_ir_builder->GetInsertBlock()) { i++; - if (i != m_current_function->end()) { + if (i != m_state.function->end()) { next_block = &(*i); } @@ -4642,9 +4739,9 @@ void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool } auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR)); - auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block); - auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block); - auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block); + auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); + auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); + auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb); m_ir_builder->SetInsertPoint(then_bb); @@ -4680,7 +4777,7 @@ Value * Compiler::InterpreterCall(const char * name, Func function, Args... args i->second++; - return Call(name, function, GetInterpreterArg(), m_ir_builder->getInt32(args)...); + return Call(name, function, m_state.args[CompileTaskState::Args::Interpreter], m_ir_builder->getInt32(args)...); } template @@ -4723,19 +4820,14 @@ Value * Compiler::Call(const char * name, Func function, Args... args) { return m_ir_builder->CreateCall(fn, fn_args); } -bool Compiler::IsBranchInstruction(u32 instruction) { - bool is_branch = false; - u32 field1 = instruction >> 26; - if (field1 == 16 || field1 == 18) { - is_branch = true; - } else if (field1 == 19) { - u32 field2 = (instruction >> 1) & 0x3FF; - if (field2 == 16 || field2 == 528) { - is_branch = true; - } - } - - return is_branch; +llvm::Value * Compiler::IndirectCall(u32 address, bool is_function) { + auto ordinal = m_recompilation_engine.AllocateOrdinal(address, is_function); + auto executable_addr_i64 = m_ir_builder->getInt64(m_recompilation_engine.GetAddressOfExecutableLookup() + (ordinal * sizeof(u64))); + auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_addr_i64, m_compiled_function_type); + return m_ir_builder->CreateCall4(executable_ptr, m_state.args[CompileTaskState::Args::ExecutionEngine], + m_state.args[CompileTaskState::Args::State], + m_state.args[CompileTaskState::Args::Interpreter], + m_state.args[CompileTaskState::Args::Tracer]); } void Compiler::InitRotateMask() { @@ -4751,20 +4843,50 @@ std::mutex RecompilationEngine::s_mutex; std::shared_ptr RecompilationEngine::s_the_instance = nullptr; RecompilationEngine::RecompilationEngine() - : ThreadBase("PPU Recompilation Engine") { - Start(); + : ThreadBase("PPU Recompilation Engine") + , m_next_ordinal(0) + , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) + , m_log("PPULLVMRecompiler.log", std::string(), sys::fs::F_Text) { + m_log.SetUnbuffered(); } RecompilationEngine::~RecompilationEngine() { Stop(); } -u32 RecompilationEngine::GetOrdinal(u32 address) { - return 0xFFFFFFFF; +u32 RecompilationEngine::AllocateOrdinal(u32 address, bool is_function) { + std::lock_guard lock(m_address_to_ordinal_lock); + + auto i = m_address_to_ordinal.find(address); + if (i == m_address_to_ordinal.end()) { + assert(m_next_ordinal < (sizeof(m_executable_lookup) / sizeof(m_executable_lookup[0]))); + + m_executable_lookup[m_next_ordinal] = is_function ? ExecutionEngine::ExecuteFunction : ExecutionEngine::ExecuteTillReturn; + std::atomic_thread_fence(std::memory_order_release); + i = m_address_to_ordinal.insert(m_address_to_ordinal.end(), std::make_pair(address, m_next_ordinal++)); + } + + return i->second; } -Executable * RecompilationEngine::GetExecutableLookup() const { - return nullptr; +u32 RecompilationEngine::GetOrdinal(u32 address) const { + std::lock_guard lock(m_address_to_ordinal_lock); + + auto i = m_address_to_ordinal.find(address); + if (i != m_address_to_ordinal.end()) { + return i->second; + } else { + return 0xFFFFFFFF; + } +} + +const Executable RecompilationEngine::GetExecutable(u32 ordinal) const { + std::atomic_thread_fence(std::memory_order_acquire); + return m_executable_lookup[ordinal]; +} + +u64 RecompilationEngine::GetAddressOfExecutableLookup() const { + return (u64)m_executable_lookup; } void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { @@ -4773,10 +4895,18 @@ void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { m_pending_execution_traces.push_back(execution_trace); } + if (!IsAlive()) { + Start(); + } + Notify(); // TODO: Increase the priority of the recompilation engine thread } +raw_fd_ostream & RecompilationEngine::Log() { + return m_log; +} + void RecompilationEngine::Task() { std::chrono::nanoseconds idling_time(0); @@ -4805,6 +4935,7 @@ void RecompilationEngine::Task() { } ProcessExecutionTrace(*execution_trace); + delete execution_trace; } // TODO: Reduce the priority of the recompilation engine thread @@ -4819,18 +4950,16 @@ void RecompilationEngine::Task() { auto total_time = std::chrono::duration_cast(end - start); auto compiler_stats = m_compiler.GetStats(); - std::string error; - raw_fd_ostream log_file("PPULLVMRecompiler.log", error, sys::fs::F_Text); - log_file << "Total time = " << total_time.count() / 1000000 << "ms\n"; - log_file << " Time spent compiling = " << compiler_stats.total_time.count() / 1000000 << "ms\n"; - log_file << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n"; - log_file << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n"; - log_file << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; - log_file << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; - log_file << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; - log_file << "\nInterpreter fallback stats:\n"; + Log() << "Total time = " << total_time.count() / 1000000 << "ms\n"; + Log() << " Time spent compiling = " << compiler_stats.total_time.count() / 1000000 << "ms\n"; + Log() << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n"; + Log() << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n"; + Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; + Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; + Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; + Log() << "\nInterpreter fallback stats:\n"; for (auto i = compiler_stats.interpreter_fallback_stats.begin(); i != compiler_stats.interpreter_fallback_stats.end(); i++) { - log_file << i->first << " = " << i->second << "\n"; + Log() << i->first << " = " << i->second << "\n"; } //log_file << "\nDisassembly:\n"; @@ -4860,6 +4989,8 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution auto execution_trace_id = GetExecutionTraceId(execution_trace); auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); if (processed_execution_trace_i == m_processed_execution_traces.end()) { + Log() << "Trace: " << execution_trace.ToString() << "\n"; + std::vector tmp_block_list; auto split_trace = false; @@ -4936,19 +5067,15 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, BlockId } } -void RecompilationEngine::CompileBlock(const BlockEntry & block_entry, bool inline_referenced_blocks) { - std::string cfg_str; - for (auto i = block_entry.cfg.begin(); i != block_entry.cfg.end(); i++) { - cfg_str += fmt::Format("0x%08X ->", i->first); - for (auto j = i->second.begin(); j != i->second.end(); j++) { - cfg_str += " " + j->ToString(); - } +void RecompilationEngine::CompileBlock(BlockEntry & block_entry, bool inline_referenced_blocks) { + Log() << "Compile: " << block_entry.ToString() << "\n"; - if (i != (block_entry.cfg.end() - 1)) { - cfg_str += "\n"; - } - } - LOG_NOTICE(PPU, "Compile: %c:0x%08X, NumHits=%u\n%s", block_entry.is_function_start ? 'F' : 'N', block_entry.address, block_entry.num_hits, cfg_str.c_str()); + auto ordinal = AllocateOrdinal(block_entry.address, block_entry.is_function_start); + auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.address, block_entry.revision++), block_entry.cfg, + block_entry.is_function_start ? false : true /*inline_all_blocks*/, + block_entry.is_function_start ? true : false /*generate_linkable_exits*/, + block_entry.is_function_start ? true : false /*generate_trace*/); + m_executable_lookup[ordinal] = executable; } std::shared_ptr RecompilationEngine::GetInstance() { @@ -4984,11 +5111,8 @@ void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { m_trace.push_back(block_id); break; case TraceType::EnterFunction: - // arg1 is address. - block_id.address = arg1; - block_id.type = BlockId::Type::Normal; + // No args used m_stack.push_back((u32)m_trace.size()); - m_trace.push_back(block_id); break; case TraceType::ExitFromCompiledFunction: // arg1 is address of function. @@ -5059,7 +5183,6 @@ void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { } if (execution_trace) { - LOG_NOTICE(PPU, "Trace: %s", execution_trace->ToString().c_str()); m_recompilation_engine->NotifyTrace(execution_trace); } } @@ -5074,7 +5197,6 @@ ppu_recompiler_llvm::ExecutionEngine::ExecutionEngine(PPUThread & ppu) , m_decoder(m_interpreter) , m_last_cache_clear_time(std::chrono::high_resolution_clock::now()) , m_recompilation_engine(RecompilationEngine::GetInstance()) { - m_executable_lookup = m_recompilation_engine->GetExecutableLookup(); } ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() { @@ -5086,7 +5208,7 @@ u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { return 0; } -void ppu_recompiler_llvm::ExecutionEngine::RemoveUnusedEntriesFromCache() { +void ppu_recompiler_llvm::ExecutionEngine::RemoveUnusedEntriesFromCache() const { auto now = std::chrono::high_resolution_clock::now(); if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 10000) { for (auto i = m_address_to_ordinal.begin(); i != m_address_to_ordinal.end();) { @@ -5103,7 +5225,7 @@ void ppu_recompiler_llvm::ExecutionEngine::RemoveUnusedEntriesFromCache() { } } -Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Executable default_executable) { +Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Executable default_executable) const { // Find the ordinal for the specified address and insert it to the cache auto i = m_address_to_ordinal.find(address); if (i == m_address_to_ordinal.end()) { @@ -5116,7 +5238,7 @@ Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Exec Executable executable = default_executable; if (i != m_address_to_ordinal.end()) { i->second.second++; - executable = m_executable_lookup[i->second.first]; + executable = m_recompilation_engine->GetExecutable(i->second.first); } RemoveUnusedEntriesFromCache(); @@ -5124,13 +5246,13 @@ Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Exec } u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer) { - tracer->Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0); + tracer->Trace(Tracer::TraceType::EnterFunction, 0, 0); return ExecuteTillReturn(execution_engine, ppu_state, interpreter, tracer); } u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer) { bool terminate = false; - bool returned = false; + bool returned = true; while (!terminate && !Emu.IsStopped()) { if (Emu.IsPaused()) { @@ -5183,6 +5305,23 @@ u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(ExecutionEngine * ex return 0; } +std::string ppu_recompiler_llvm::ControlFlowGraphToString(const ControlFlowGraph & cfg) { + std::string s; + + for (auto i = cfg.begin(); i != cfg.end(); i++) { + s += fmt::Format("0x%08X ->", i->first); + for (auto j = i->second.begin(); j != i->second.end(); j++) { + s += " " + j->ToString(); + } + + if (i != (cfg.end() - 1)) { + s += "\n"; + } + } + + return s; +} + BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) { auto type = BranchType::NonBranch; auto field1 = instruction >> 26; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 5f27cd540e..856ecddc72 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -4,6 +4,7 @@ #include "Emu/Cell/PPUDecoder.h" #include "Emu/Cell/PPUThread.h" #include "Emu/Cell/PPUInterpreter.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" @@ -49,6 +50,9 @@ namespace ppu_recompiler_llvm { /// Control flow graph of a block. A list of (block address, list of next blocks) pairs. typedef std::vector>> ControlFlowGraph; + /// Get a string representation of a ControlFlowGraph + std::string ControlFlowGraphToString(const ControlFlowGraph & cfg); + /// Uniquely identifies an execution trace typedef u64 ExecutionTraceId; @@ -87,6 +91,9 @@ namespace ppu_recompiler_llvm { /// Number of times this block was hit u32 num_hits; + /// The current revision number of this function + u32 revision; + /// The CFG for this block ControlFlowGraph cfg; @@ -99,9 +106,15 @@ namespace ppu_recompiler_llvm { BlockEntry(u32 addr) : address(addr) , num_hits(0) + , revision(0) , is_compiled(false) { } + std::string ToString() const { + return fmt::Format("%c:0x%08X, NumHits=%u, IsCompiled=%c\n%s", is_function_start ? 'F' : 'N', address, num_hits, + is_compiled ? 'Y' : 'N', ControlFlowGraphToString(cfg).c_str()); + } + bool operator == (const BlockEntry & other) const { return address == other.address; } @@ -137,7 +150,7 @@ namespace ppu_recompiler_llvm { std::map interpreter_fallback_stats; }; - Compiler(); + Compiler(RecompilationEngine & recompilation_engine, const Executable default_function_executable, const Executable default_block_executable); Compiler(const Compiler & other) = delete; Compiler(Compiler && other) = delete; @@ -147,11 +160,11 @@ namespace ppu_recompiler_llvm { Compiler & operator = (const Compiler & other) = delete; Compiler & operator = (Compiler && other) = delete; - /// Compile a code fragment and obtain an executable - //Executable Compile(const std::string & name, const CodeFragment & code_fragment); + /// Compile a code fragment described by a cfg and return an executable + Executable Compile(const std::string & name, const ControlFlowGraph & cfg, bool inline_all_blocks, bool generate_linkable_exits, bool generate_trace); - /// Free an executable earilier obtained from the Compile function - //void FreeCompiledCodeFragment(Executable executable); + /// Free an executable earilier obtained via a call to Compile + void FreeExecutable(const std::string & name); /// Retrieve compiler stats Stats GetStats(); @@ -563,6 +576,58 @@ namespace ppu_recompiler_llvm { void UNK(const u32 code, const u32 opcode, const u32 gcode) override; private: + /// State of a compilation task + struct CompileTaskState { + enum Args { + ExecutionEngine, + State, + Interpreter, + Tracer, + MaxArgs, + }; + + /// The LLVM function for the compilation task + llvm::Function * function; + + /// Args of the LLVM function + llvm::Value * args[MaxArgs]; + + /// The CFG being compiled + const ControlFlowGraph * cfg; + + /// The current entry of the CFG being compiled + ControlFlowGraph::const_iterator cfg_entry; + + /// Address of the current instruction being compiled + u32 current_instruction_address; + + /// Map from an address to the address of the block that it belongs to + std::unordered_map address_to_block; + + /// A flag used to detect branch instructions. + /// This is set to false at the start of compilation of a block. + /// When a branch instruction is encountered, this is set to true by the decode function. + bool hit_branch_instruction; + + /// Indicates whether a block should be inlined even if an already compiled version of the block exists + bool inline_all_blocks; + + /// Create code such that exit points can be linked to other blocks + bool generate_linkable_exits; + + /// Notify the tracer upon exit + bool generate_trace; + }; + + /// Recompilation engine + RecompilationEngine & m_recompilation_engine; + + /// The executable that will be called to process unknown functions + const Executable m_default_function_executable; + + /// The executable that will be called to process unknown blocks + const Executable m_default_block_executable; + /// LLVM context llvm::LLVMContext * m_llvm_context; @@ -578,37 +643,23 @@ namespace ppu_recompiler_llvm { /// Function pass manager llvm::FunctionPassManager * m_fpm; - /// A flag used to detect branch instructions. - /// This is set to false at the start of compilation of a block. - /// When a branch instruction is encountered, this is set to true by the decode function. - bool m_hit_branch_instruction; + /// LLVM type of the functions genreated by the compiler + llvm::FunctionType * m_compiled_function_type; - /// The function being compiled - llvm::Function * m_current_function; - - /// The list of next blocks for the current block - const std::vector * m_current_block_next_blocks; - - /// Address of the current instruction - u32 m_current_instruction_address; + /// State of the current compilation task + CompileTaskState m_state; /// Compiler stats Stats m_stats; /// Get the name of the basic block for the specified address - std::string GetBasicBlockNameFromAddress(u32 address); + std::string GetBasicBlockNameFromAddress(u32 address, const std::string & suffix = ""); + + /// Get the address of a basic block from its name + u32 GetAddressFromBasicBlockName(const std::string & name); /// Get the basic block in for the specified address. - llvm::BasicBlock * GetBasicBlockFromAddress(u32 address, llvm::Function * function, bool create_if_not_exist = false); - - /// Get PPU state pointer argument - llvm::Value * GetPPUStateArg(); - - /// Get interpreter pointer argument - llvm::Value * GetInterpreterArg(); - - /// Get tracer pointer argument - llvm::Value * GetTracerArg(); + llvm::BasicBlock * GetBasicBlockFromAddress(u32 address, const std::string & suffix = "", bool create_if_not_exist = true); /// Get a bit llvm::Value * GetBit(llvm::Value * val, u32 n); @@ -754,8 +805,8 @@ namespace ppu_recompiler_llvm { template llvm::Value * Call(const char * name, Func function, Args... args); - /// Tests if the instruction is a branch instruction or not - bool IsBranchInstruction(u32 instruction); + /// Indirect call + llvm::Value * IndirectCall(u32 address, bool is_function); /// Test an instruction against the interpreter template @@ -778,21 +829,61 @@ namespace ppu_recompiler_llvm { public: virtual ~RecompilationEngine(); - /// Get the ordinal for the specified address - u32 GetOrdinal(u32 address); + /// Allocate an ordinal + u32 AllocateOrdinal(u32 address, bool is_function); - /// Get the executable lookup table - Executable * GetExecutableLookup() const; + /// Get the ordinal for the specified address + u32 GetOrdinal(u32 address) const; + + /// Get the executable specified by the ordinal + const Executable GetExecutable(u32 ordinal) const; + + /// Get the address of the executable lookup + u64 GetAddressOfExecutableLookup() const; /// Notify the recompilation engine about a newly detected trace. It takes ownership of the trace. void NotifyTrace(ExecutionTrace * execution_trace); + /// Log + llvm::raw_fd_ostream & Log(); + void Task() override; /// Get a pointer to the instance of this class static std::shared_ptr GetInstance(); private: + /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. + std::mutex m_pending_execution_traces_lock; + + /// Queue of execution traces pending processing + std::list m_pending_execution_traces; + + /// Block table + std::unordered_set m_block_table; + + /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. + std::unordered_map> m_processed_execution_traces; + + /// Lock for accessing m_address_to_ordinal. + // TODO: Make this a RW lock + mutable std::mutex m_address_to_ordinal_lock; + + /// Mapping from address to ordinal + std::unordered_map m_address_to_ordinal; + + /// Next ordinal to allocate + u32 m_next_ordinal; + + /// PPU Compiler + Compiler m_compiler; + + /// Log + llvm::raw_fd_ostream m_log; + + /// Executable lookup table + Executable m_executable_lookup[10000]; // TODO: Adjust size + RecompilationEngine(); RecompilationEngine(const RecompilationEngine & other) = delete; @@ -808,22 +899,7 @@ namespace ppu_recompiler_llvm { void UpdateControlFlowGraph(ControlFlowGraph & cfg, BlockId block, BlockId next_block); /// Compile a block - void CompileBlock(const BlockEntry & block_entry, bool inline_referenced_blocks); - - /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. - std::mutex m_pending_execution_traces_lock; - - /// Queue of execution traces pending processing - std::list m_pending_execution_traces; - - /// Block table - std::unordered_set m_block_table; - - /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. - std::unordered_map> m_processed_execution_traces; - - /// PPU Compiler - Compiler m_compiler; + void CompileBlock(BlockEntry & block_entry, bool inline_referenced_blocks); /// Mutex used to prevent multiple creation static std::mutex s_mutex; @@ -836,7 +912,7 @@ namespace ppu_recompiler_llvm { class Tracer { public: /// Trace type - enum class TraceType { + enum class TraceType : u32 { CallFunction, EnterFunction, ExitFromCompiledFunction, @@ -874,6 +950,7 @@ namespace ppu_recompiler_llvm { /// PPU execution engine class ExecutionEngine : public CPUDecoder { + friend class RecompilationEngine; public: ExecutionEngine(PPUThread & ppu); ExecutionEngine() = delete; @@ -901,23 +978,20 @@ namespace ppu_recompiler_llvm { /// Execution tracer Tracer m_tracer; - /// Executable lookup table - Executable * m_executable_lookup; - /// The time at which the m_address_to_ordinal cache was last cleared - std::chrono::high_resolution_clock::time_point m_last_cache_clear_time; + mutable std::chrono::high_resolution_clock::time_point m_last_cache_clear_time; - /// Address to ordinal lookup. Key is address. Data is the pair (ordinal, times hit). - std::unordered_map> m_address_to_ordinal; + /// Address to ordinal cahce. Key is address. Data is the pair (ordinal, times hit). + mutable std::unordered_map> m_address_to_ordinal; /// Recompilation engine std::shared_ptr m_recompilation_engine; /// Remove unused entries from the m_address_to_ordinal cache - void RemoveUnusedEntriesFromCache(); + void RemoveUnusedEntriesFromCache() const; /// Get the executable for the specified address - Executable GetExecutable(u32 address, Executable default_executable); + Executable GetExecutable(u32 address, Executable default_executable) const; /// Execute a function static u64 ExecuteFunction(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer); From 34e34910fd4cac5340396c6d8d1ee3fe7fd11f3c Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 8 Nov 2014 09:52:23 +0530 Subject: [PATCH 06/27] Integrated execution engine, tracer, recompilation engine and compiler --- rpcs3/Emu/CPU/CPUThread.h | 2 + rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 504 ++++++++++++--------------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 270 +++++++++----- 3 files changed, 425 insertions(+), 351 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 3acb2c12b3..d15af1431d 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -113,6 +113,8 @@ public: return temp; } + CPUDecoder * GetDecoder() { return m_dec; }; + public: u32 entry; u32 PC; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 912d457a1b..1805a9a095 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -26,10 +26,8 @@ using namespace ppu_recompiler_llvm; u64 Compiler::s_rotate_mask[64][64]; bool Compiler::s_rotate_mask_inited = false; -Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable default_function_executable, const Executable default_block_executable) - : m_recompilation_engine(recompilation_engine) - , m_default_function_executable(default_function_executable) - , m_default_block_executable(default_block_executable) { +Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable unknown_function, const Executable unknown_block) + : m_recompilation_engine(recompilation_engine) { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetDisassembler(); @@ -67,11 +65,18 @@ Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable m_fpm->doInitialization(); std::vector arg_types; - arg_types.push_back(m_ir_builder->getInt64Ty()->getPointerTo()); - arg_types.push_back(m_ir_builder->getInt64Ty()->getPointerTo()); - arg_types.push_back(m_ir_builder->getInt64Ty()->getPointerTo()); - arg_types.push_back(m_ir_builder->getInt64Ty()->getPointerTo()); - m_compiled_function_type = FunctionType::get(m_ir_builder->getInt64Ty(), arg_types, false); + arg_types.push_back(m_ir_builder->getInt8PtrTy()); + arg_types.push_back(m_ir_builder->getInt8PtrTy()); + arg_types.push_back(m_ir_builder->getInt64Ty()); + m_compiled_function_type = FunctionType::get(m_ir_builder->getInt32Ty(), arg_types, false); + + m_unknown_function = (Function *)m_module->getOrInsertFunction("unknown_function", m_compiled_function_type); + m_unknown_function->setCallingConv(CallingConv::X86_64_Win64); + m_execution_engine->addGlobalMapping(m_unknown_function, unknown_function); + + m_unknown_block = (Function *)m_module->getOrInsertFunction("unknown_block", m_compiled_function_type); + m_unknown_block->setCallingConv(CallingConv::X86_64_Win64); + m_execution_engine->addGlobalMapping(m_unknown_block, unknown_block); if (!s_rotate_mask_inited) { InitRotateMask(); @@ -86,50 +91,53 @@ Compiler::~Compiler() { delete m_llvm_context; } -Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool inline_all_blocks, bool generate_linkable_exits, bool generate_trace) { - assert(!name.empty()); - assert(!cfg.empty()); - +Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool inline_all, bool generate_linkable_exits) { auto compilation_start = std::chrono::high_resolution_clock::now(); m_state.cfg = &cfg; - m_state.inline_all_blocks = inline_all_blocks; + m_state.inline_all = inline_all; m_state.generate_linkable_exits = generate_linkable_exits; - m_state.generate_trace = generate_trace; - m_state.address_to_block.clear(); // Create the function m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); m_state.function->setCallingConv(CallingConv::X86_64_Win64); auto arg_i = m_state.function->arg_begin(); - arg_i->setName("execution_engine"); - m_state.args[CompileTaskState::Args::ExecutionEngine] = arg_i; - (++arg_i)->setName("state"); + arg_i->setName("ppu_state"); m_state.args[CompileTaskState::Args::State] = arg_i; (++arg_i)->setName("interpreter"); m_state.args[CompileTaskState::Args::Interpreter] = arg_i; - (++arg_i)->setName("tracer"); - m_state.args[CompileTaskState::Args::Tracer] = arg_i; + (++arg_i)->setName("context"); + m_state.args[CompileTaskState::Args::Context] = arg_i; // Create the entry block and add code to branch to the first instruction m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0)); - m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg[0].first)); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg.start_address)); - // Convert each block in this CFG to LLVM IR - for (m_state.cfg_entry = cfg.begin(); m_state.cfg_entry != cfg.end(); m_state.cfg_entry++) { - m_state.current_instruction_address = m_state.cfg_entry->first; - auto block = GetBasicBlockFromAddress(m_state.current_instruction_address); - m_ir_builder->SetInsertPoint(block); + // Convert each instruction in the CFG to LLVM IR + std::vector exit_instr_list; + for (auto instr_i = cfg.instruction_addresses.begin(); instr_i != cfg.instruction_addresses.end(); instr_i++) { + m_state.current_instruction_address = *instr_i; + auto instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); + m_ir_builder->SetInsertPoint(instr_bb); m_state.hit_branch_instruction = false; - if (!inline_all_blocks && m_state.cfg_entry != cfg.begin()) { + if (!inline_all && instr_i != cfg.instruction_addresses.begin()) { // Use an already compiled implementation of this block if available - auto ordinal = m_recompilation_engine.GetOrdinal(m_state.cfg_entry->first); + auto ordinal = m_recompilation_engine.GetOrdinal(*instr_i); if (ordinal != 0xFFFFFFFF) { - auto ret_i64 = IndirectCall(m_state.cfg_entry->first, false); - auto switch_instr = m_ir_builder->CreateSwitch(ret_i64, GetBasicBlockFromAddress(0xFFFFFFFF)); - for (auto i = m_state.cfg_entry->second.begin(); i != m_state.cfg_entry->second.end(); i++) { - switch_instr->addCase(m_ir_builder->getInt64(i->address), GetBasicBlockFromAddress(i->address)); + auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); + exit_instr_list.push_back(exit_instr_i32); + + auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + auto ret_i32 = IndirectCall(*instr_i, context_i64, false); + + auto switch_instr = m_ir_builder->CreateSwitch(ret_i32, GetBasicBlockFromAddress(0xFFFFFFFF)); + auto branch_i = cfg.branches.find(*instr_i); + if (branch_i != cfg.branches.end()) { + for (auto next_instr_i = branch_i->second.begin(); next_instr_i != branch_i->second.end(); next_instr_i++) { + switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); + } } m_state.hit_branch_instruction = true; @@ -137,7 +145,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & } while (!m_state.hit_branch_instruction) { - if (!block->getInstList().empty()) { + if (!instr_bb->getInstList().empty()) { break; } @@ -146,54 +154,51 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & if (!m_state.hit_branch_instruction) { m_state.current_instruction_address += 4; - block = GetBasicBlockFromAddress(m_state.current_instruction_address); - m_ir_builder->CreateBr(block); - m_ir_builder->SetInsertPoint(block); + instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); + m_ir_builder->CreateBr(instr_bb); + m_ir_builder->SetInsertPoint(instr_bb); } } } m_recompilation_engine.Log() << *m_state.function; + // Generate exit logic for all empty blocks auto default_exit_block_name = GetBasicBlockNameFromAddress(0xFFFFFFFF); for (auto block_i = m_state.function->begin(); block_i != m_state.function->end(); block_i++) { if (!block_i->getInstList().empty() || block_i->getName() == default_exit_block_name) { continue; } - // An empty block. Generate exit logic. + // Found an empty block m_recompilation_engine.Log() << "Empty block: " << block_i->getName() << "\n"; m_ir_builder->SetInsertPoint(block_i); - auto exit_block_i64 = m_ir_builder->CreatePHI(m_ir_builder->getInt64Ty(), 0); - for (auto i = pred_begin(block_i); i != pred_end(block_i); i++) { - auto pred_address = GetAddressFromBasicBlockName(block_i->getName()); - exit_block_i64->addIncoming(m_ir_builder->getInt64(m_state.address_to_block[pred_address]), *i); - } + auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); + exit_instr_list.push_back(exit_instr_i32); - auto block_address = GetAddressFromBasicBlockName(block_i->getName()); - SetPc(m_ir_builder->getInt32(block_address)); + auto instr_address = GetAddressFromBasicBlockName(block_i->getName()); + SetPc(m_ir_builder->getInt32(instr_address)); if (generate_linkable_exits) { - if (generate_trace) { - Call("Tracer.Trace", &Tracer::Trace, m_ir_builder->getInt32((uint32_t)Tracer::TraceType::ExitFromCompiledFunction), - m_ir_builder->getInt32(cfg[0].first), m_ir_builder->CreateTrunc(exit_block_i64, m_ir_builder->getInt32Ty())); - } - - auto ret_i64 = IndirectCall(block_address, false); - auto cmp_i1 = m_ir_builder->CreateICmpNE(ret_i64, m_ir_builder->getInt64(0)); - auto then_bb = BasicBlock::Create(m_ir_builder->getContext()); - auto merge_bb = BasicBlock::Create(m_ir_builder->getContext()); + auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + auto ret_i32 = IndirectCall(instr_address, context_i64, false); + auto cmp_i1 = m_ir_builder->CreateICmpNE(ret_i32, m_ir_builder->getInt32(0)); + auto then_bb = GetBasicBlockFromAddress(instr_address, "then"); + auto merge_bb = GetBasicBlockFromAddress(instr_address, "merge"); m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); m_ir_builder->SetInsertPoint(then_bb); - IndirectCall(1, false); + context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + m_ir_builder->CreateCall3(m_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(merge_bb); - m_ir_builder->CreateRet(m_ir_builder->getInt64(0)); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); } else { - m_ir_builder->CreateRet(exit_block_i64); + m_ir_builder->CreateRet(exit_instr_i32); } } @@ -203,33 +208,34 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & auto default_exit_bb = GetBasicBlockFromAddress(0xFFFFFFFF, false); if (default_exit_bb) { m_ir_builder->SetInsertPoint(default_exit_bb); - auto exit_block_i64 = m_ir_builder->CreatePHI(m_ir_builder->getInt64Ty(), 1); - for (auto i = pred_begin(default_exit_bb); i != pred_end(default_exit_bb); i++) { - // the last but one instruction of the predecessor sets the exit block address - auto j = (*i)->rbegin(); - j++; - exit_block_i64->addIncoming(&(*j), *i); - } + auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); + exit_instr_list.push_back(exit_instr_i32); if (generate_linkable_exits) { - auto cmp_i1 = m_ir_builder->CreateICmpNE(exit_block_i64, m_ir_builder->getInt64(0)); - auto then_bb = BasicBlock::Create(m_ir_builder->getContext()); - auto merge_bb = BasicBlock::Create(m_ir_builder->getContext()); + auto cmp_i1 = m_ir_builder->CreateICmpNE(exit_instr_i32, m_ir_builder->getInt32(0)); + auto then_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "then"); + auto merge_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "merge"); m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); m_ir_builder->SetInsertPoint(then_bb); - if (generate_trace) { - Call("Tracer.Trace", &Tracer::Trace, m_ir_builder->getInt32((uint32_t)Tracer::TraceType::ExitFromCompiledFunction), - m_ir_builder->getInt32(cfg[0].first), m_ir_builder->CreateTrunc(exit_block_i64, m_ir_builder->getInt32Ty())); - } - - IndirectCall(1, false); + auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + m_ir_builder->CreateCall3(m_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(merge_bb); - m_ir_builder->CreateRet(m_ir_builder->getInt64(0)); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); } else { - m_ir_builder->CreateRet(exit_block_i64); + m_ir_builder->CreateRet(exit_instr_i32); + } + } + + // Add incoming values for all exit instr PHI nodes + for (auto exit_instr_i = exit_instr_list.begin(); exit_instr_i != exit_instr_list.end(); exit_instr_i++) { + auto block = (*exit_instr_i)->getParent(); + for (auto pred_i = pred_begin(block); pred_i != pred_end(block); pred_i++) { + auto pred_address = GetAddressFromBasicBlockName((*pred_i)->getName()); + (*exit_instr_i)->addIncoming(m_ir_builder->getInt32(pred_address), *pred_i); } } @@ -1575,7 +1581,8 @@ void Compiler::ADDIS(u32 rd, u32 ra, s32 simm16) { void Compiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, bd)); - CreateBranch(CheckBranchCondition(bo, bi), target_i64, lk ? true : false); + auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), target_i32, lk ? true : false); //m_hit_branch_instruction = true; //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); //InterpreterCall("BC", &PPUInterpreter::BC, bo, bi, bd, aa, lk); @@ -1589,7 +1596,8 @@ void Compiler::SC(u32 sc_code) { void Compiler::B(s32 ll, u32 aa, u32 lk) { auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, ll)); - CreateBranch(nullptr, target_i64, lk ? true : false); + auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); + CreateBranch(nullptr, target_i32, lk ? true : false); //m_hit_branch_instruction = true; //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); //InterpreterCall("B", &PPUInterpreter::B, ll, aa, lk); @@ -1609,7 +1617,8 @@ void Compiler::MCRF(u32 crfd, u32 crfs) { void Compiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { auto lr_i64 = GetLr(); lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL); - CreateBranch(CheckBranchCondition(bo, bi), lr_i64, lk ? true : false, true); + auto lr_i32 = m_ir_builder->CreateTrunc(lr_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), lr_i32, lk ? true : false, true); //m_hit_branch_instruction = true; //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); //InterpreterCall("BCLR", &PPUInterpreter::BCLR, bo, bi, bh, lk); @@ -1710,7 +1719,8 @@ void Compiler::CROR(u32 crbd, u32 crba, u32 crbb) { void Compiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { auto ctr_i64 = GetCtr(); ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL); - CreateBranch(CheckBranchCondition(bo, bi), ctr_i64, lk ? true : false); + auto ctr_i32 = m_ir_builder->CreateTrunc(ctr_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), ctr_i32, lk ? true : false); //m_hit_branch_instruction = true; //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); //InterpreterCall("BCCTR", &PPUInterpreter::BCCTR, bo, bi, bh, lk); @@ -4148,7 +4158,7 @@ void Compiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { //InterpreterCall("UNK", &PPUInterpreter::UNK, code, opcode, gcode); } -std::string Compiler::GetBasicBlockNameFromAddress(u32 address, const std::string & suffix) { +std::string Compiler::GetBasicBlockNameFromAddress(u32 address, const std::string & suffix) const { std::string name; if (address == 0) { @@ -4166,7 +4176,7 @@ std::string Compiler::GetBasicBlockNameFromAddress(u32 address, const std::strin return name; } -u32 Compiler::GetAddressFromBasicBlockName(const std::string & name) { +u32 Compiler::GetAddressFromBasicBlockName(const std::string & name) const { if (name.compare(0, 6, "instr_") == 0) { return strtoul(name.c_str() + 6, nullptr, 0); } else if (name == GetBasicBlockNameFromAddress(0)) { @@ -4585,7 +4595,7 @@ Value * Compiler::CheckBranchCondition(u32 bo, u32 bi) { return cmp_i1; } -void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk, bool target_is_lr) { +void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool lk, bool target_is_lr) { if (lk) { SetLr(m_ir_builder->getInt64(m_state.current_instruction_address + 4)); } @@ -4593,18 +4603,18 @@ void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool auto current_block = m_ir_builder->GetInsertBlock(); BasicBlock * target_block = nullptr; - if (dyn_cast(target_i64)) { + if (dyn_cast(target_i32)) { // Target address is an immediate value. - u32 target_address = (u32)(dyn_cast(target_i64)->getLimitedValue()); + u32 target_address = (u32)(dyn_cast(target_i32)->getLimitedValue()); if (lk) { // Function call if (cmp_i1) { // There is no need to create a new block for an unconditional jump - target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function); + target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); m_ir_builder->SetInsertPoint(target_block); } - SetPc(target_i64); - IndirectCall(target_address, true); + SetPc(target_i32); + IndirectCall(target_address, m_ir_builder->getInt64(0), true); m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); } else { // Local branch @@ -4613,34 +4623,40 @@ void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool } else { // Target address is in a register if (cmp_i1) { // There is no need to create a new block for an unconditional jump - target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function); + target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); m_ir_builder->SetInsertPoint(target_block); } - SetPc(target_i64); - + SetPc(target_i32); if (target_is_lr && !lk) { - // Return from function call - m_ir_builder->CreateRet(m_ir_builder->getInt64(0)); + // Return from this function + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); } else if (lk) { - auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); - auto call_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function); - m_ir_builder->CreateOr(m_ir_builder->getInt64(m_state.cfg_entry->first), (uint64_t)0); - auto switch_instr = m_ir_builder->CreateSwitch(target_i64, call_block); - m_ir_builder->SetInsertPoint(call_block); - IndirectCall(0, true); + auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); + auto unknown_function_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "unknown_function"); + + auto switch_instr = m_ir_builder->CreateSwitch(target_i32, unknown_function_block); + m_ir_builder->SetInsertPoint(unknown_function_block); + m_ir_builder->CreateCall3(m_unknown_function, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], m_ir_builder->getInt64(0)); m_ir_builder->CreateBr(next_block); - for (auto i = m_state.cfg_entry->second.begin(); i != m_state.cfg_entry->second.end(); i++) { - call_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function); - m_ir_builder->SetInsertPoint(call_block); - IndirectCall(i->address, true); - m_ir_builder->CreateBr(next_block); - switch_instr->addCase(m_ir_builder->getInt32(i->address), call_block); + + auto call_i = m_state.cfg->calls.find(m_state.current_instruction_address); + if (call_i != m_state.cfg->calls.end()) { + for (auto function_i = call_i->second.begin(); function_i != call_i->second.end(); function_i++) { + auto block = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("0x%08X", *function_i)); + m_ir_builder->SetInsertPoint(block); + IndirectCall(*function_i, m_ir_builder->getInt64(0), true); + m_ir_builder->CreateBr(next_block); + switch_instr->addCase(m_ir_builder->getInt32(*function_i), block); + } } } else { - auto switch_instr = m_ir_builder->CreateSwitch(target_i64, GetBasicBlockFromAddress(0xFFFFFFFF)); - for (auto i = m_state.cfg_entry->second.begin(); i != m_state.cfg_entry->second.end(); i++) { - switch_instr->addCase(m_ir_builder->getInt64(i->address), GetBasicBlockFromAddress(i->address)); + auto switch_instr = m_ir_builder->CreateSwitch(target_i32, GetBasicBlockFromAddress(0xFFFFFFFF)); + auto branch_i = m_state.cfg->branches.find(m_state.current_instruction_address); + if (branch_i != m_state.cfg->branches.end()) { + for (auto next_instr_i = branch_i->second.begin(); next_instr_i != branch_i->second.end(); next_instr_i++) { + switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); + } } } } @@ -4820,14 +4836,11 @@ Value * Compiler::Call(const char * name, Func function, Args... args) { return m_ir_builder->CreateCall(fn, fn_args); } -llvm::Value * Compiler::IndirectCall(u32 address, bool is_function) { +llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_function) { auto ordinal = m_recompilation_engine.AllocateOrdinal(address, is_function); auto executable_addr_i64 = m_ir_builder->getInt64(m_recompilation_engine.GetAddressOfExecutableLookup() + (ordinal * sizeof(u64))); auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_addr_i64, m_compiled_function_type); - return m_ir_builder->CreateCall4(executable_ptr, m_state.args[CompileTaskState::Args::ExecutionEngine], - m_state.args[CompileTaskState::Args::State], - m_state.args[CompileTaskState::Args::Interpreter], - m_state.args[CompileTaskState::Args::Tracer]); + return m_ir_builder->CreateCall3(executable_ptr, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); } void Compiler::InitRotateMask() { @@ -4986,7 +4999,7 @@ void RecompilationEngine::Task() { } void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) { - auto execution_trace_id = GetExecutionTraceId(execution_trace); + auto execution_trace_id = execution_trace.GetId(); auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); if (processed_execution_trace_i == m_processed_execution_traces.end()) { Log() << "Trace: " << execution_trace.ToString() << "\n"; @@ -4995,38 +5008,33 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution auto split_trace = false; auto block_i = m_block_table.end(); - auto trace_block_i = execution_trace.blocks.begin(); - for (; trace_block_i != execution_trace.blocks.end(); trace_block_i++) { - if (trace_block_i->type == BlockId::Type::Exit) { + for (auto trace_i = execution_trace.entries.begin(); trace_i != execution_trace.entries.end(); trace_i++) { + if (trace_i->type == ExecutionTraceEntry::Type::CompiledBlock) { block_i = m_block_table.end(); split_trace = true; - } else if (block_i == m_block_table.end()) { - BlockEntry key(trace_block_i->address); + } + + if (block_i == m_block_table.end()) { + BlockEntry key(trace_i->GetPrimaryAddress(), execution_trace.function_address); block_i = m_block_table.find(&key); if (block_i == m_block_table.end()) { - block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.address)); + block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); } - (*block_i)->is_function_start = key.address == execution_trace.function_address; tmp_block_list.push_back(*block_i); } - if (block_i != m_block_table.end()) { - BlockId next_block; - if (trace_block_i + 1 != execution_trace.blocks.end()) { - next_block = *(trace_block_i + 1); - } else { - if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { - next_block = *(execution_trace.blocks.begin()); - } else { - next_block.address = 0; - next_block.type = BlockId::Type::Exit; - } + const ExecutionTraceEntry * next_trace = nullptr; + if (trace_i + 1 != execution_trace.entries.end()) { + next_trace = &(*(trace_i + 1)); + } else if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { + if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { + next_trace = &(*(execution_trace.entries.begin())); } - - UpdateControlFlowGraph((*block_i)->cfg, *trace_block_i, next_block); } + + UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace); } processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list))); @@ -5045,24 +5053,26 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution std::remove_if(processed_execution_trace_i->second.begin(), processed_execution_trace_i->second.end(), [](const BlockEntry * b)->bool { return b->is_compiled; }); } -void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, BlockId block, BlockId next_block) { - if (block.type == BlockId::Type::Exit && next_block.type == BlockId::Type::Exit) { - return; - } +void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry) { + if (this_entry.type == ExecutionTraceEntry::Type::Instruction) { + cfg.instruction_addresses.insert(this_entry.GetPrimaryAddress()); - if (block.type == BlockId::Type::FunctionCall) { - return; - } - - auto block_i = std::find_if(cfg.begin(), cfg.end(), [&block](const ControlFlowGraph::value_type & v)->bool { return v.first == block.address; }); - if (block.type == BlockId::Type::Normal && block_i == cfg.end()) { - block_i = cfg.insert(cfg.end(), std::make_pair(block.address, std::vector())); - } - - if (block_i != cfg.end() && next_block.address && next_block.type != BlockId::Type::Exit) { - auto next_block_i = std::find(block_i->second.begin(), block_i->second.end(), next_block); - if (next_block_i == block_i->second.end()) { - block_i->second.push_back(next_block); + if (next_entry) { + if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { + if (next_entry->GetPrimaryAddress() != (this_entry.GetPrimaryAddress() + 4)) { + cfg.branches[this_entry.GetPrimaryAddress()].insert(next_entry->GetPrimaryAddress()); + } + } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { + cfg.calls[this_entry.instruction.address].insert(next_entry->GetPrimaryAddress()); + } + } + } else if (this_entry.type == ExecutionTraceEntry::Type::CompiledBlock) { + if (next_entry) { + if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { + cfg.branches[this_entry.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); + } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { + cfg.calls[this_entry.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); + } } } } @@ -5070,11 +5080,11 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, BlockId void RecompilationEngine::CompileBlock(BlockEntry & block_entry, bool inline_referenced_blocks) { Log() << "Compile: " << block_entry.ToString() << "\n"; - auto ordinal = AllocateOrdinal(block_entry.address, block_entry.is_function_start); - auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.address, block_entry.revision++), block_entry.cfg, - block_entry.is_function_start ? false : true /*inline_all_blocks*/, - block_entry.is_function_start ? true : false /*generate_linkable_exits*/, - block_entry.is_function_start ? true : false /*generate_trace*/); + auto is_funciton = block_entry.cfg.start_address == block_entry.cfg.function_address; + auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, is_funciton); + auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg, + is_funciton ? false : true /*inline_all*/, + is_funciton ? true : false /*generate_linkable_exits*/); m_executable_lookup[ordinal] = executable; } @@ -5090,7 +5100,6 @@ std::shared_ptr RecompilationEngine::GetInstance() { Tracer::Tracer() : m_recompilation_engine(RecompilationEngine::GetInstance()) { - m_trace.reserve(1000); m_stack.reserve(100); } @@ -5100,81 +5109,59 @@ Tracer::~Tracer() { void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { ExecutionTrace * execution_trace = nullptr; - BlockId block_id; - int function; switch (trace_type) { case TraceType::CallFunction: // arg1 is address of the function - block_id.address = arg1; - block_id.type = BlockId::Type::FunctionCall; - m_trace.push_back(block_id); + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::FunctionCall, arg1)); break; case TraceType::EnterFunction: - // No args used - m_stack.push_back((u32)m_trace.size()); + // arg1 is address of the function + m_stack.push_back(new ExecutionTrace(arg1)); break; case TraceType::ExitFromCompiledFunction: // arg1 is address of function. - // arg2 is the address of the exit block. - block_id.address = arg1; - block_id.type = BlockId::Type::Normal; - m_stack.push_back((u32)m_trace.size()); - m_trace.push_back(block_id); - - block_id.address = arg2; - block_id.type = BlockId::Type::Exit; - m_trace.push_back(block_id); + // arg2 is the address of the exit instruction. + if (arg2) { + m_stack.push_back(new ExecutionTrace(arg1)); + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); + } break; case TraceType::Return: // No args used - function = m_stack.back(); + execution_trace = m_stack.back(); + execution_trace->type = ExecutionTrace::Type::Linear; m_stack.pop_back(); - - execution_trace = new ExecutionTrace(); - execution_trace->type = ExecutionTrace::Type::Linear; - execution_trace->function_address = m_trace[function].address; - std::copy(m_trace.begin() + function, m_trace.end(), std::back_inserter(execution_trace->blocks)); - m_trace.erase(m_trace.begin() + function, m_trace.end()); break; - case TraceType::EnterBlock: - // arg1 is address. Other args are not used. - function = m_stack.back(); - for (int i = (int)m_trace.size() - 1; i >= function; i--) { - if (m_trace[i].address == arg1 && m_trace[i].type == BlockId::Type::Normal) { - // Found a loop within the current function - execution_trace = new ExecutionTrace(); - execution_trace->type = ExecutionTrace::Type::Loop; - execution_trace->function_address = m_trace[function].address; - std::copy(m_trace.begin() + i, m_trace.end(), std::back_inserter(execution_trace->blocks)); - m_trace.erase(m_trace.begin() + i + 1, m_trace.end()); + case TraceType::Instruction: + // arg1 is the address of the instruction + for (int i = (int)m_stack.back()->entries.size() - 1; i >= 0; i--) { + if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].instruction.address == arg1) || + (m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].compiled_block.entry_address == arg1)) { + // Found a loop + execution_trace = new ExecutionTrace(m_stack.back()->function_address); + execution_trace->type = ExecutionTrace::Type::Loop; + std::copy(m_stack.back()->entries.begin() + i, m_stack.back()->entries.end(), std::back_inserter(execution_trace->entries)); + m_stack.back()->entries.erase(m_stack.back()->entries.begin() + i + 1, m_stack.back()->entries.end()); break; } } if (!execution_trace) { // A loop was not found - block_id.address = arg1; - block_id.type = BlockId::Type::Normal; - m_trace.push_back(block_id); + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::Instruction, arg1)); } break; case TraceType::ExitFromCompiledBlock: - // arg1 is address of the exit block. - block_id.address = arg1; - block_id.type = BlockId::Type::Exit; - m_trace.push_back(block_id); + // arg1 is address of the compiled block. + // arg2 is the address of the exit instruction. + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); - if (arg1 == 0) { + if (arg2 == 0) { // Return from function - function = m_stack.back(); + execution_trace = m_stack.back(); + execution_trace->type = ExecutionTrace::Type::Linear; m_stack.pop_back(); - - execution_trace = new ExecutionTrace(); - execution_trace->type = ExecutionTrace::Type::Linear; - execution_trace->function_address = m_trace[function].address; - std::copy(m_trace.begin() + function, m_trace.end(), std::back_inserter(execution_trace->blocks)); - m_trace.erase(m_trace.begin() + function, m_trace.end()); } break; default: @@ -5204,7 +5191,7 @@ ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() { } u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { - ExecuteFunction(this, &m_ppu, m_interpreter, &m_tracer); + ExecuteFunction(&m_ppu, m_interpreter, 0); return 0; } @@ -5245,14 +5232,20 @@ Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Exec return executable; } -u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer) { - tracer->Trace(Tracer::TraceType::EnterFunction, 0, 0); - return ExecuteTillReturn(execution_engine, ppu_state, interpreter, tracer); +u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context) { + auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder(); + execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0); + return ExecuteTillReturn(ppu_state, interpreter, 0); } -u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer) { - bool terminate = false; - bool returned = true; +u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context) { + auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder(); + auto terminate = false; + auto branch_type = BranchType::NonBranch; + + if (context) { + execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledFunction, context >> 32, context & 0xFFFFFFFF); + } while (!terminate && !Emu.IsStopped()) { if (Emu.IsPaused()) { @@ -5260,68 +5253,45 @@ u64 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(ExecutionEngine * ex continue; } - BranchType branch_type; - if (!returned) { + auto executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteTillReturn); + if (executable != ExecuteTillReturn) { + auto entry = ppu_state->PC; + auto exit = (u32)executable(ppu_state, interpreter, 0); + execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit); + if (exit == 0) { + terminate = true; + } + } else { + execution_engine->m_tracer.Trace(Tracer::TraceType::Instruction, ppu_state->PC, 0); auto instruction = re32(vm::get_ref(ppu_state->PC)); execution_engine->m_decoder.Decode(instruction); branch_type = ppu_state->m_is_branch ? GetBranchTypeFromInstruction(instruction) : BranchType::NonBranch; ppu_state->NextPc(4); - } else { - returned = false; - branch_type = BranchType::LocalBranch; - } - Executable executable; - switch (branch_type) { - case BranchType::Return: - tracer->Trace(Tracer::TraceType::Return, 0, 0); - terminate = true; - break; - case BranchType::FunctionCall: - tracer->Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); - executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction); - executable(execution_engine, ppu_state, interpreter, tracer); - returned = true; - break; - case BranchType::LocalBranch: - tracer->Trace(Tracer::TraceType::EnterBlock, ppu_state->PC, 0); - executable = execution_engine->GetExecutable(ppu_state->PC, nullptr); - if (executable != nullptr) { - auto exit_block = executable(execution_engine, ppu_state, interpreter, tracer); - tracer->Trace(Tracer::TraceType::ExitFromCompiledBlock, (u32)exit_block, 0); - if (exit_block == 0) { - terminate = true; - } + switch (branch_type) { + case BranchType::Return: + execution_engine->m_tracer.Trace(Tracer::TraceType::Return, 0, 0); + terminate = true; + break; + case BranchType::FunctionCall: + execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); + executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction); + executable(ppu_state, interpreter, 0); + break; + case BranchType::LocalBranch: + break; + case BranchType::NonBranch: + break; + default: + assert(0); + break; } - break; - case BranchType::NonBranch: - break; - default: - assert(0); - break; } } return 0; } -std::string ppu_recompiler_llvm::ControlFlowGraphToString(const ControlFlowGraph & cfg) { - std::string s; - - for (auto i = cfg.begin(); i != cfg.end(); i++) { - s += fmt::Format("0x%08X ->", i->first); - for (auto j = i->second.begin(); j != i->second.end(); j++) { - s += " " + j->ToString(); - } - - if (i != (cfg.end() - 1)) { - s += "\n"; - } - } - - return s; -} - BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) { auto type = BranchType::NonBranch; auto field1 = instruction >> 26; @@ -5340,13 +5310,3 @@ BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) { return type; } - -ExecutionTraceId ppu_recompiler_llvm::GetExecutionTraceId(const ExecutionTrace & execution_trace) { - ExecutionTraceId id = 0; - - for (auto i = execution_trace.blocks.begin(); i != execution_trace.blocks.end(); i++) { - id = (id << 8) ^ ((u64)i->address << 32 | _byteswap_ulong((u64)i->address)); - } - - return id; -} diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 856ecddc72..963e22c8af 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -19,45 +19,106 @@ namespace ppu_recompiler_llvm { class ExecutionEngine; struct PPUState; - enum class BranchType { - NonBranch, - LocalBranch, - FunctionCall, - Return, - }; + /// An entry in an execution trace + struct ExecutionTraceEntry { + /// Data associated with the entry. This is discriminated by type. + union { + struct Instruction { + u32 address; + } instruction; - /// Unique id of a block - struct BlockId { - /// Address of the block - u32 address; + struct FunctionCall { + u32 address; + } function_call; - /// The type of the block + struct CompiledBlock { + u32 entry_address; + u32 exit_address; + } compiled_block; + }; + + /// The type of the entry enum class Type { FunctionCall, - Normal, - Exit, + Instruction, + CompiledBlock, } type; - bool operator == (const BlockId & other) const { - return (address == other.address && type == other.type); + ExecutionTraceEntry(Type type, u32 arg1, u32 arg2 = 0) + : type(type) { + switch (type) { + case Type::Instruction: + instruction.address = arg1; + break; + case Type::FunctionCall: + function_call.address = arg1; + break; + case Type::CompiledBlock: + compiled_block.entry_address = arg1; + compiled_block.exit_address = arg2; + break; + default: + assert(0); + break; + } + } + + u32 GetPrimaryAddress() const { + switch (type) { + case Type::Instruction: + return instruction.address; + case Type::FunctionCall: + return function_call.address; + case Type::CompiledBlock: + return compiled_block.entry_address; + default: + assert(0); + return 0; + } } std::string ToString() const { - return fmt::Format("%c:0x%08X", type == BlockId::Type::Normal ? 'N' : type == BlockId::Type::FunctionCall ? 'F' : 'E', address); + switch (type) { + case Type::Instruction: + return fmt::Format("I:0x%08X", instruction.address); + case Type::FunctionCall: + return fmt::Format("F:0x%08X", function_call.address); + case Type::CompiledBlock: + return fmt::Format("C:0x%08X-0x%08X", compiled_block.entry_address, compiled_block.exit_address); + default: + assert(0); + return ""; + } + } + + u64 hash() const { + u64 hash = ((u64)type << 32); + switch (type) { + case Type::Instruction: + hash |= instruction.address; + break; + case Type::FunctionCall: + hash |= function_call.address; + break; + case Type::CompiledBlock: + hash = compiled_block.exit_address; + hash <<= 32; + hash |= compiled_block.entry_address; + break; + default: + assert(0); + break; + } + + return hash; } }; - /// Control flow graph of a block. A list of (block address, list of next blocks) pairs. - typedef std::vector>> ControlFlowGraph; - - /// Get a string representation of a ControlFlowGraph - std::string ControlFlowGraphToString(const ControlFlowGraph & cfg); - - /// Uniquely identifies an execution trace - typedef u64 ExecutionTraceId; - /// An execution trace. struct ExecutionTrace { + /// Unique id of an execution trace; + typedef u64 Id; + /// The function to which this trace belongs u32 function_address; @@ -67,27 +128,98 @@ namespace ppu_recompiler_llvm { Loop, } type; - /// Sequence of blocks enountered in this trace - std::vector blocks; + /// entries in the trace + std::vector entries; + + ExecutionTrace(u32 address) + : function_address(address) { + } std::string ToString() const { auto s = fmt::Format("0x%08X %s ->", function_address, type == ExecutionTrace::Type::Loop ? "Loop" : "Linear"); - for (auto i = 0; i < blocks.size(); i++) { - s += " " + blocks[i].ToString(); + for (auto i = 0; i < entries.size(); i++) { + s += " " + entries[i].ToString(); + } + + return s; + } + + Id GetId() const { + Id id = 0; + + for (auto i = entries.begin(); i != entries.end(); i++) { + id ^= i->hash(); + id <<= 1; + } + + return id; + } + }; + + /// A control flow graph + struct ControlFlowGraph { + /// Address of the first instruction + u32 start_address; + + /// Address of the function to which this CFG belongs to + u32 function_address; + + /// Set of addresses of the instructions in the CFG + std::set instruction_addresses; + + /// Branches in the CFG. + /// Key is the address of an instruction + /// Data is the set of all instructions to which this instruction branches to. + std::map> branches; + + /// Function calls in the CFG + /// Key is the address of an instruction + /// Data is the set of all functions which this instruction invokes. + std::map> calls; + + ControlFlowGraph(u32 start_address, u32 function_address) + : start_address(start_address) + , function_address(function_address) { + } + + std::string ToString() const { + auto s = fmt::Format("0x%08X (0x%08X):", start_address, function_address); + for (auto i = instruction_addresses.begin(); i != instruction_addresses.end(); i++) { + s += fmt::Format(" 0x%08X", *i); + } + + s += "\nBranches:"; + for (auto i = branches.begin(); i != branches.end(); i++) { + s += fmt::Format("\n0x%08X ->", i->first); + for (auto j = i->second.begin(); j != i->second.end(); j++) { + s += fmt::Format(" 0x%08X", *j); + } + } + + s += "\nCalls:"; + for (auto i = calls.begin(); i != calls.end(); i++) { + s += fmt::Format("\n0x%08X ->", i->first); + for (auto j = i->second.begin(); j != i->second.end(); j++) { + s += fmt::Format(" 0x%08X", *j); + } } return s; } }; + enum class BranchType { + NonBranch, + LocalBranch, + FunctionCall, + Return, + }; + /// Pointer to an executable - typedef u64(*Executable)(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer); + typedef u32(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context); /// An entry in the block table struct BlockEntry { - /// Address of the block - u32 address; - /// Number of times this block was hit u32 num_hits; @@ -100,23 +232,19 @@ namespace ppu_recompiler_llvm { /// Indicates whether the block has been compiled or not bool is_compiled; - /// Indicates whether the block is the first block of a function or not - bool is_function_start; - - BlockEntry(u32 addr) - : address(addr) - , num_hits(0) + BlockEntry(u32 start_address, u32 function_address) + : num_hits(0) , revision(0) - , is_compiled(false) { + , is_compiled(false) + , cfg(start_address, function_address) { } std::string ToString() const { - return fmt::Format("%c:0x%08X, NumHits=%u, IsCompiled=%c\n%s", is_function_start ? 'F' : 'N', address, num_hits, - is_compiled ? 'Y' : 'N', ControlFlowGraphToString(cfg).c_str()); + return fmt::Format("%s\nNumHits=%u, Revision=%u, IsCompiled=%c\n", cfg.ToString().c_str(), num_hits, revision, is_compiled ? 'Y' : 'N'); } bool operator == (const BlockEntry & other) const { - return address == other.address; + return cfg.start_address == other.cfg.start_address; } }; } @@ -124,7 +252,7 @@ namespace ppu_recompiler_llvm { namespace std { template<> struct hash { size_t operator()(const ppu_recompiler_llvm::BlockEntry * e) const { - return e->address; + return e->cfg.start_address; } }; } @@ -150,7 +278,7 @@ namespace ppu_recompiler_llvm { std::map interpreter_fallback_stats; }; - Compiler(RecompilationEngine & recompilation_engine, const Executable default_function_executable, const Executable default_block_executable); + Compiler(RecompilationEngine & recompilation_engine, const Executable unknown_function, const Executable unknown_block); Compiler(const Compiler & other) = delete; Compiler(Compiler && other) = delete; @@ -161,7 +289,7 @@ namespace ppu_recompiler_llvm { Compiler & operator = (Compiler && other) = delete; /// Compile a code fragment described by a cfg and return an executable - Executable Compile(const std::string & name, const ControlFlowGraph & cfg, bool inline_all_blocks, bool generate_linkable_exits, bool generate_trace); + Executable Compile(const std::string & name, const ControlFlowGraph & cfg, bool inline_all_blocks, bool generate_linkable_exits); /// Free an executable earilier obtained via a call to Compile void FreeExecutable(const std::string & name); @@ -579,10 +707,9 @@ namespace ppu_recompiler_llvm { /// State of a compilation task struct CompileTaskState { enum Args { - ExecutionEngine, State, Interpreter, - Tracer, + Context, MaxArgs, }; @@ -595,38 +722,29 @@ namespace ppu_recompiler_llvm { /// The CFG being compiled const ControlFlowGraph * cfg; - /// The current entry of the CFG being compiled - ControlFlowGraph::const_iterator cfg_entry; - /// Address of the current instruction being compiled u32 current_instruction_address; - /// Map from an address to the address of the block that it belongs to - std::unordered_map address_to_block; - /// A flag used to detect branch instructions. - /// This is set to false at the start of compilation of a block. - /// When a branch instruction is encountered, this is set to true by the decode function. + /// This is set to false at the start of compilation of an instruction. + /// If a branch instruction is encountered, this is set to true by the decode function. bool hit_branch_instruction; /// Indicates whether a block should be inlined even if an already compiled version of the block exists - bool inline_all_blocks; + bool inline_all; /// Create code such that exit points can be linked to other blocks bool generate_linkable_exits; - - /// Notify the tracer upon exit - bool generate_trace; }; /// Recompilation engine RecompilationEngine & m_recompilation_engine; - /// The executable that will be called to process unknown functions - const Executable m_default_function_executable; + /// The function that will be called to process unknown functions + llvm::Function * m_unknown_function; /// The executable that will be called to process unknown blocks - const Executable m_default_block_executable; + llvm::Function * m_unknown_block; /// LLVM context llvm::LLVMContext * m_llvm_context; @@ -653,10 +771,10 @@ namespace ppu_recompiler_llvm { Stats m_stats; /// Get the name of the basic block for the specified address - std::string GetBasicBlockNameFromAddress(u32 address, const std::string & suffix = ""); + std::string GetBasicBlockNameFromAddress(u32 address, const std::string & suffix = "") const; /// Get the address of a basic block from its name - u32 GetAddressFromBasicBlockName(const std::string & name); + u32 GetAddressFromBasicBlockName(const std::string & name) const; /// Get the basic block in for the specified address. llvm::BasicBlock * GetBasicBlockFromAddress(u32 address, const std::string & suffix = "", bool create_if_not_exist = true); @@ -785,7 +903,7 @@ namespace ppu_recompiler_llvm { llvm::Value * CheckBranchCondition(u32 bo, u32 bi); /// Create IR for a branch instruction - void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk, bool target_is_lr = false); + void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool lk, bool target_is_lr = false); /// Read from memory llvm::Value * ReadMemory(llvm::Value * addr_i64, u32 bits, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); @@ -806,7 +924,7 @@ namespace ppu_recompiler_llvm { llvm::Value * Call(const char * name, Func function, Args... args); /// Indirect call - llvm::Value * IndirectCall(u32 address, bool is_function); + llvm::Value * IndirectCall(u32 address, llvm::Value * context_i64, bool is_function); /// Test an instruction against the interpreter template @@ -863,7 +981,7 @@ namespace ppu_recompiler_llvm { std::unordered_set m_block_table; /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. - std::unordered_map> m_processed_execution_traces; + std::unordered_map> m_processed_execution_traces; /// Lock for accessing m_address_to_ordinal. // TODO: Make this a RW lock @@ -896,7 +1014,7 @@ namespace ppu_recompiler_llvm { void ProcessExecutionTrace(const ExecutionTrace & execution_trace); /// Update a CFG - void UpdateControlFlowGraph(ControlFlowGraph & cfg, BlockId block, BlockId next_block); + void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry); /// Compile a block void CompileBlock(BlockEntry & block_entry, bool inline_referenced_blocks); @@ -917,7 +1035,7 @@ namespace ppu_recompiler_llvm { EnterFunction, ExitFromCompiledFunction, Return, - EnterBlock, + Instruction, ExitFromCompiledBlock, }; @@ -938,11 +1056,8 @@ namespace ppu_recompiler_llvm { void Terminate(); private: - /// Current execution trace - std::vector m_trace; - /// Call stack - std::vector m_stack; + std::vector m_stack; /// Recompilation engine std::shared_ptr m_recompilation_engine; @@ -994,17 +1109,14 @@ namespace ppu_recompiler_llvm { Executable GetExecutable(u32 address, Executable default_executable) const; /// Execute a function - static u64 ExecuteFunction(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer); + static u32 ExecuteFunction(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context); /// Execute till the current function returns - static u64 ExecuteTillReturn(ExecutionEngine * execution_engine, PPUThread * ppu_state, PPUInterpreter * interpreter, Tracer * tracer); + static u32 ExecuteTillReturn(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context); }; /// Get the branch type from a branch instruction BranchType GetBranchTypeFromInstruction(u32 instruction); - - /// Get the execution trace id of an execution trace - ExecutionTraceId GetExecutionTraceId(const ExecutionTrace & execution_trace); } #endif // PPU_LLVM_RECOMPILER_H From 80294e1034195be8ce13894884690150e8793371 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 9 Nov 2014 02:00:19 +0530 Subject: [PATCH 07/27] Fixed some bugs --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 143 +++++++++++---------------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 22 +++-- rpcs3/rpcs3.vcxproj | 2 +- 3 files changed, 71 insertions(+), 96 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 1805a9a095..11f83eb8ca 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -116,12 +116,12 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & // Convert each instruction in the CFG to LLVM IR std::vector exit_instr_list; for (auto instr_i = cfg.instruction_addresses.begin(); instr_i != cfg.instruction_addresses.end(); instr_i++) { + m_state.hit_branch_instruction = false; m_state.current_instruction_address = *instr_i; auto instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); m_ir_builder->SetInsertPoint(instr_bb); - m_state.hit_branch_instruction = false; - if (!inline_all && instr_i != cfg.instruction_addresses.begin()) { + if (!inline_all && *instr_i != cfg.start_address) { // Use an already compiled implementation of this block if available auto ordinal = m_recompilation_engine.GetOrdinal(*instr_i); if (ordinal != 0xFFFFFFFF) { @@ -139,30 +139,18 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); } } - - m_state.hit_branch_instruction = true; } } - while (!m_state.hit_branch_instruction) { - if (!instr_bb->getInstList().empty()) { - break; - } - + if (instr_bb->empty()) { u32 instr = re32(vm::get_ref(m_state.current_instruction_address)); Decode(instr); - if (!m_state.hit_branch_instruction) { - m_state.current_instruction_address += 4; - instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); - m_ir_builder->CreateBr(instr_bb); - m_ir_builder->SetInsertPoint(instr_bb); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); } } } - m_recompilation_engine.Log() << *m_state.function; - // Generate exit logic for all empty blocks auto default_exit_block_name = GetBasicBlockNameFromAddress(0xFFFFFFFF); for (auto block_i = m_state.function->begin(); block_i != m_state.function->end(); block_i++) { @@ -171,8 +159,6 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & } // Found an empty block - m_recompilation_engine.Log() << "Empty block: " << block_i->getName() << "\n"; - m_ir_builder->SetInsertPoint(block_i); auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); exit_instr_list.push_back(exit_instr_i32); @@ -202,10 +188,8 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & } } - m_recompilation_engine.Log() << *m_state.function; - // If the function has a default exit block then generate code for it - auto default_exit_bb = GetBasicBlockFromAddress(0xFFFFFFFF, false); + auto default_exit_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "", false); if (default_exit_bb) { m_ir_builder->SetInsertPoint(default_exit_bb); auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); @@ -213,8 +197,8 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & if (generate_linkable_exits) { auto cmp_i1 = m_ir_builder->CreateICmpNE(exit_instr_i32, m_ir_builder->getInt32(0)); - auto then_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "then"); - auto merge_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "merge"); + auto then_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "then"); + auto merge_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "merge"); m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); m_ir_builder->SetInsertPoint(then_bb); @@ -239,6 +223,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & } } +#ifdef _DEBUG m_recompilation_engine.Log() << *m_state.function; std::string verify; @@ -246,6 +231,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & if (verifyFunction(*m_state.function, &verify_ostream)) { m_recompilation_engine.Log() << "Verification failed: " << verify << "\n"; } +#endif auto ir_build_end = std::chrono::high_resolution_clock::now(); m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); @@ -261,6 +247,20 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & auto translate_end = std::chrono::high_resolution_clock::now(); m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); +#ifdef _DEBUG + m_recompilation_engine.Log() << "\nDisassembly:\n"; + auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); + for (size_t pc = 0; pc < mci.size();) { + char str[1024]; + + auto size = LLVMDisasmInstruction(disassembler, ((u8 *)mci.address()) + pc, mci.size() - pc, (uint64_t)(((u8 *)mci.address()) + pc), str, sizeof(str)); + m_recompilation_engine.Log() << fmt::Format("0x%08X: ", (u64)(((u8 *)mci.address()) + pc)) << str << '\n'; + pc += size; + } + + LLVMDisasmDispose(disassembler); +#endif + auto compilation_end = std::chrono::high_resolution_clock::now(); m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); @@ -4191,15 +4191,24 @@ u32 Compiler::GetAddressFromBasicBlockName(const std::string & name) const { BasicBlock * Compiler::GetBasicBlockFromAddress(u32 address, const std::string & suffix, bool create_if_not_exist) { auto block_name = GetBasicBlockNameFromAddress(address, suffix); BasicBlock * block = nullptr; - for (auto i = m_state.function->getBasicBlockList().begin(); i != m_state.function->getBasicBlockList().end(); i++) { + BasicBlock * next_block = nullptr; + for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { if (i->getName() == block_name) { block = &(*i); break; } + +#ifdef _DEBUG + auto block_address = GetAddressFromBasicBlockName(i->getName()); + if (block_address > address) { + next_block = &(*i); + break; + } +#endif } if (!block && create_if_not_exist) { - block = BasicBlock::Create(m_ir_builder->getContext(), block_name, m_state.function); + block = BasicBlock::Create(m_ir_builder->getContext(), block_name, m_state.function, next_block); } return block; @@ -4688,22 +4697,10 @@ Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bsw return val_ix; } else { - BasicBlock * next_block = nullptr; - for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { - if (&(*i) == m_ir_builder->GetInsertBlock()) { - i++; - if (i != m_state.function->end()) { - next_block = &(*i); - } - - break; - } - } - auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR)); - auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); - auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); - auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); + auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then"); + auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "else"); + auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge"); m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb); m_ir_builder->SetInsertPoint(then_bb); @@ -4742,22 +4739,10 @@ void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, val_ix->getType()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_ix, eaddr_ix_ptr, alignment); } else { - BasicBlock * next_block = nullptr; - for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { - if (&(*i) == m_ir_builder->GetInsertBlock()) { - i++; - if (i != m_state.function->end()) { - next_block = &(*i); - } - - break; - } - } - auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR)); - auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); - auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); - auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_state.function, next_block); + auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then"); + auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "else"); + auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge"); m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb); m_ir_builder->SetInsertPoint(then_bb); @@ -4837,9 +4822,11 @@ Value * Compiler::Call(const char * name, Func function, Args... args) { } llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_function) { - auto ordinal = m_recompilation_engine.AllocateOrdinal(address, is_function); - auto executable_addr_i64 = m_ir_builder->getInt64(m_recompilation_engine.GetAddressOfExecutableLookup() + (ordinal * sizeof(u64))); - auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_addr_i64, m_compiled_function_type); + auto ordinal = m_recompilation_engine.AllocateOrdinal(address, is_function); + auto location_i64 = m_ir_builder->getInt64(m_recompilation_engine.GetAddressOfExecutableLookup() + (ordinal * sizeof(u64))); + auto location_i64_ptr = m_ir_builder->CreateIntToPtr(location_i64, m_ir_builder->getInt64Ty()->getPointerTo()); + auto executable_i64 = m_ir_builder->CreateLoad(location_i64_ptr); + auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_i64, m_compiled_function_type->getPointerTo()); return m_ir_builder->CreateCall3(executable_ptr, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); } @@ -4970,30 +4957,12 @@ void RecompilationEngine::Task() { Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; + Log() << "Ordinals allocated = " << m_next_ordinal << "\n"; Log() << "\nInterpreter fallback stats:\n"; for (auto i = compiler_stats.interpreter_fallback_stats.begin(); i != compiler_stats.interpreter_fallback_stats.end(); i++) { Log() << i->first << " = " << i->second << "\n"; } - //log_file << "\nDisassembly:\n"; - //auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); - //for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) { - // log_file << fmt::Format("%s: Size = %u bytes, Number of instructions = %u\n", i->second.llvm_function->getName().str().c_str(), i->second.size, i->second.num_instructions); - - // uint8_t * fn_ptr = (uint8_t *)i->second.executable; - // for (size_t pc = 0; pc < i->second.size;) { - // char str[1024]; - - // auto size = LLVMDisasmInstruction(disassembler, fn_ptr + pc, i->second.size - pc, (uint64_t)(fn_ptr + pc), str, sizeof(str)); - // log_file << str << '\n'; - // pc += size; - // } - //} - - //LLVMDisasmDispose(disassembler); - - //log_file << "\nLLVM IR:\n" << *m_module; - LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting."); s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this. } @@ -5002,7 +4971,9 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution auto execution_trace_id = execution_trace.GetId(); auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); if (processed_execution_trace_i == m_processed_execution_traces.end()) { +#ifdef _DEBUG Log() << "Trace: " << execution_trace.ToString() << "\n"; +#endif std::vector tmp_block_list; @@ -5029,9 +5000,7 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution if (trace_i + 1 != execution_trace.entries.end()) { next_trace = &(*(trace_i + 1)); } else if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { - if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { - next_trace = &(*(execution_trace.entries.begin())); - } + next_trace = &(*(execution_trace.entries.begin())); } UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace); @@ -5043,7 +5012,7 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution for (auto i = processed_execution_trace_i->second.begin(); i != processed_execution_trace_i->second.end(); i++) { if (!(*i)->is_compiled) { (*i)->num_hits++; - if ((*i)->num_hits >= 1) { // TODO: Make this configurable + if ((*i)->num_hits >= 1000) { // TODO: Make this configurable CompileBlock(*(*i), false); (*i)->is_compiled = true; } @@ -5078,13 +5047,15 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const E } void RecompilationEngine::CompileBlock(BlockEntry & block_entry, bool inline_referenced_blocks) { +#ifdef _DEBUG Log() << "Compile: " << block_entry.ToString() << "\n"; +#endif - auto is_funciton = block_entry.cfg.start_address == block_entry.cfg.function_address; - auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, is_funciton); + auto is_function = block_entry.cfg.start_address == block_entry.cfg.function_address; + auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, is_function); auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg, - is_funciton ? false : true /*inline_all*/, - is_funciton ? true : false /*generate_linkable_exits*/); + is_function ? false : true /*inline_all*/, + is_function ? true : false /*generate_linkable_exits*/); m_executable_lookup[ordinal] = executable; } @@ -5254,7 +5225,7 @@ u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_stat } auto executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteTillReturn); - if (executable != ExecuteTillReturn) { + if (executable != ExecuteTillReturn && executable != ExecuteFunction) { auto entry = ppu_state->PC; auto exit = (u32)executable(ppu_state, interpreter, 0); execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit); diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 963e22c8af..8c3fd6c699 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -240,20 +240,24 @@ namespace ppu_recompiler_llvm { } std::string ToString() const { - return fmt::Format("%s\nNumHits=%u, Revision=%u, IsCompiled=%c\n", cfg.ToString().c_str(), num_hits, revision, is_compiled ? 'Y' : 'N'); + return fmt::Format("%s\nNumHits=%u, Revision=%u, IsCompiled=%c", cfg.ToString().c_str(), num_hits, revision, is_compiled ? 'Y' : 'N'); } bool operator == (const BlockEntry & other) const { return cfg.start_address == other.cfg.start_address; } - }; -} -namespace std { - template<> struct hash { - size_t operator()(const ppu_recompiler_llvm::BlockEntry * e) const { - return e->cfg.start_address; - } + struct hash { + size_t operator()(const BlockEntry * e) const { + return e->cfg.start_address; + } + }; + + struct equal_to { + bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const { + return *lhs == *rhs; + } + }; }; } @@ -978,7 +982,7 @@ namespace ppu_recompiler_llvm { std::list m_pending_execution_traces; /// Block table - std::unordered_set m_block_table; + std::unordered_set m_block_table; /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. std::unordered_map> m_processed_execution_traces; diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index d1c76abf35..c96bfad05e 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -87,7 +87,7 @@ true - $(SolutionDir)$(Platform)\$(Configuration)\emucore.lib;wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;LLVMJIT.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib; ;%(AdditionalDependencies) + $(SolutionDir)$(Platform)\$(Configuration)\emucore.lib;wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;LLVMJIT.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib; LLVMMCDisassembler.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) false ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64;..\llvm_build\Debug\lib From 5c468d7591f60f719bcb60b8be4f23e7e16be103 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 9 Nov 2014 11:50:01 +0530 Subject: [PATCH 08/27] Disabled call stack tracing when using the PPU LLVM recompiler --- rpcs3/Emu/CPU/CPUThread.cpp | 3 ++- rpcs3/Emu/CPU/CPUThread.h | 4 ++++ rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 2 +- rpcs3/Emu/Cell/PPUThread.cpp | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index fea5e15400..5990228a29 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -29,6 +29,7 @@ CPUThread::CPUThread(CPUThreadType type) , m_status(Stopped) , m_last_syscall(0) , m_trace_enabled(false) + , m_trace_call_stack(true) { } @@ -155,7 +156,7 @@ void CPUThread::SetBranch(const u32 pc, bool record_branch) m_is_branch = true; nPC = pc; - if(record_branch) + if(m_trace_call_stack && record_branch) CallStackBranch(pc); } diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index d15af1431d..35d48dd990 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -43,6 +43,8 @@ protected: CPUDecoder* m_dec; + bool m_trace_call_stack; + public: virtual void InitRegs()=0; @@ -177,6 +179,8 @@ public: u32 GetId() const { return m_id; } CPUThreadType GetType() const { return m_type; } + void SetCallStackTracing(bool trace_call_stack) { m_trace_call_stack = trace_call_stack; } + void Reset(); void Close(); void Run(); diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 11f83eb8ca..eb7ad0fa8d 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -237,7 +237,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); // Optimize this function - //m_fpm->run(*m_state.function); + m_fpm->run(*m_state.function); auto optimize_end = std::chrono::high_resolution_clock::now(); m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index df7ce2740b..a74a64fa2d 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -111,6 +111,7 @@ void PPUThread::DoRun() break; case 2: + SetCallStackTracing(false); if (!m_dec) { m_dec = new ppu_recompiler_llvm::ExecutionEngine(*this); } From 71c12360c7ef8232cf157680cf08046077ef10a3 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 9 Nov 2014 14:03:36 +0530 Subject: [PATCH 09/27] Always inline functions --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 41 +++++++++++++++++++++++----- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 40 +++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index eb7ad0fa8d..5e48bb5548 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -4991,6 +4991,17 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution block_i = m_block_table.find(&key); if (block_i == m_block_table.end()) { block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); + + // Update the function to block map + auto function_to_block_i = m_function_to_blocks.find(execution_trace.function_address); + if (function_to_block_i == m_function_to_blocks.end()) { + function_to_block_i = m_function_to_blocks.insert(m_function_to_blocks.end(), std::make_pair(execution_trace.function_address, std::vector())); + } + + auto i = std::find(function_to_block_i->second.begin(), function_to_block_i->second.end(), *block_i); + if (i == function_to_block_i->second.end()) { + function_to_block_i->second.push_back(*block_i); + } } tmp_block_list.push_back(*block_i); @@ -5013,7 +5024,7 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution if (!(*i)->is_compiled) { (*i)->num_hits++; if ((*i)->num_hits >= 1000) { // TODO: Make this configurable - CompileBlock(*(*i), false); + CompileBlock(*(*i)); (*i)->is_compiled = true; } } @@ -5046,16 +5057,32 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const E } } -void RecompilationEngine::CompileBlock(BlockEntry & block_entry, bool inline_referenced_blocks) { +void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { #ifdef _DEBUG Log() << "Compile: " << block_entry.ToString() << "\n"; #endif - auto is_function = block_entry.cfg.start_address == block_entry.cfg.function_address; - auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, is_function); - auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg, - is_function ? false : true /*inline_all*/, - is_function ? true : false /*generate_linkable_exits*/); + ControlFlowGraph * cfg; + ControlFlowGraph temp_cfg(block_entry.cfg.start_address, block_entry.cfg.function_address); + if (block_entry.IsFunction()) { + // Form a CFG by merging all the blocks in this function + auto function_to_block_i = m_function_to_blocks.find(block_entry.cfg.function_address); + for (auto block_i = function_to_block_i->second.begin(); block_i != function_to_block_i->second.end(); block_i++) { + temp_cfg += (*block_i)->cfg; + } + + cfg = &temp_cfg; + } else { + cfg = &block_entry.cfg; + } + +#ifdef _DEBUG + Log() << "CFG: " << cfg->ToString() << "\n"; +#endif + + auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, block_entry.IsFunction()); + auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), *cfg, true, + block_entry.IsFunction() ? true : false /*generate_linkable_exits*/); m_executable_lookup[ordinal] = executable; } diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 8c3fd6c699..20523aa5a4 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -182,6 +182,34 @@ namespace ppu_recompiler_llvm { , function_address(function_address) { } + void operator += (const ControlFlowGraph & other) { + for (auto i = other.instruction_addresses.begin(); i != other.instruction_addresses.end(); i++) { + instruction_addresses.insert(*i); + } + + for (auto i = other.branches.begin(); i != other.branches.end(); i++) { + auto j = branches.find(i->first); + if (j == branches.end()) { + j = branches.insert(branches.begin(), std::make_pair(i->first, std::set())); + } + + for (auto k = i->second.begin(); k != i->second.end(); k++) { + j->second.insert(*k); + } + } + + for (auto i = other.calls.begin(); i != other.calls.end(); i++) { + auto j = calls.find(i->first); + if (j == calls.end()) { + j = calls.insert(calls.begin(), std::make_pair(i->first, std::set())); + } + + for (auto k = i->second.begin(); k != i->second.end(); k++) { + j->second.insert(*k); + } + } + } + std::string ToString() const { auto s = fmt::Format("0x%08X (0x%08X):", start_address, function_address); for (auto i = instruction_addresses.begin(); i != instruction_addresses.end(); i++) { @@ -240,13 +268,18 @@ namespace ppu_recompiler_llvm { } std::string ToString() const { - return fmt::Format("%s\nNumHits=%u, Revision=%u, IsCompiled=%c", cfg.ToString().c_str(), num_hits, revision, is_compiled ? 'Y' : 'N'); + return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, IsCompiled=%c", + cfg.start_address, cfg.function_address, num_hits, revision, is_compiled ? 'Y' : 'N'); } bool operator == (const BlockEntry & other) const { return cfg.start_address == other.cfg.start_address; } + bool IsFunction() const { + return cfg.function_address == cfg.start_address; + } + struct hash { size_t operator()(const BlockEntry * e) const { return e->cfg.start_address; @@ -984,6 +1017,9 @@ namespace ppu_recompiler_llvm { /// Block table std::unordered_set m_block_table; + /// Maps a function to the set of all blocks in the function. Key is the address of the function. + std::unordered_map> m_function_to_blocks; + /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. std::unordered_map> m_processed_execution_traces; @@ -1021,7 +1057,7 @@ namespace ppu_recompiler_llvm { void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry); /// Compile a block - void CompileBlock(BlockEntry & block_entry, bool inline_referenced_blocks); + void CompileBlock(BlockEntry & block_entry); /// Mutex used to prevent multiple creation static std::mutex s_mutex; From c12a98510c8efb435ec17fce2b726a70f5c1ef5c Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 10 Nov 2014 01:04:21 +0530 Subject: [PATCH 10/27] Utilize idle time to combine blocks --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 114 +++++++++++++------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 151 ++++++++++++++++++--------- 2 files changed, 177 insertions(+), 88 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 5e48bb5548..afcfc66f15 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -4908,41 +4908,70 @@ raw_fd_ostream & RecompilationEngine::Log() { } void RecompilationEngine::Task() { + bool work_done_this_iteration = false; + bool work_done_last_iteration = false; std::chrono::nanoseconds idling_time(0); + std::chrono::nanoseconds recompiling_time(0); auto start = std::chrono::high_resolution_clock::now(); while (!TestDestroy() && !Emu.IsStopped()) { - // Wait a few ms for something to happen - auto idling_start = std::chrono::high_resolution_clock::now(); - WaitForAnySignal(250); - auto idling_end = std::chrono::high_resolution_clock::now(); - idling_time += std::chrono::duration_cast(idling_end - idling_start); + work_done_last_iteration = work_done_this_iteration; + work_done_this_iteration = false; + ExecutionTrace * execution_trace = nullptr; - u32 num_processed = 0; - while (!TestDestroy() && !Emu.IsStopped()) { - ExecutionTrace * execution_trace; + { + std::lock_guard lock(m_pending_execution_traces_lock); - { - std::lock_guard lock(m_pending_execution_traces_lock); + auto i = m_pending_execution_traces.begin(); + if (i != m_pending_execution_traces.end()) { + execution_trace = *i; + m_pending_execution_traces.erase(i); + } + } - auto i = m_pending_execution_traces.begin(); - if (i != m_pending_execution_traces.end()) { - execution_trace = *i; - m_pending_execution_traces.erase(i); - } else { - break; + if (execution_trace) { + ProcessExecutionTrace(*execution_trace); + delete execution_trace; + work_done_this_iteration = true; + } + + if (!work_done_this_iteration) { + // TODO: Reduce the priority of the recompilation engine thread if its set to high priority + } + + if (!work_done_this_iteration && !work_done_last_iteration) { + auto recompiling_start = std::chrono::high_resolution_clock::now(); + + // Recompile the function with the most number of compiled fragments + auto candidate = m_function_table.end(); + for (auto function_i = m_function_table.begin(); function_i != m_function_table.end(); function_i++) { + if ((*function_i)->num_compiled_fragments && (*function_i)->blocks.front()->IsFunction() && (*function_i)->blocks.front()->is_compiled) { + if (candidate != m_function_table.end()) { + if ((*function_i)->num_compiled_fragments > (*candidate)->num_compiled_fragments) { + candidate = function_i; + } + } else { + candidate = function_i; + } } } - ProcessExecutionTrace(*execution_trace); - delete execution_trace; + if (candidate != m_function_table.end()) { + Log() << "Recompiling: " << (*candidate)->ToString() << "\n"; + CompileBlock(*(*candidate), *((*candidate)->blocks.front())); + work_done_this_iteration = true; + } + + auto recompiling_end = std::chrono::high_resolution_clock::now(); + recompiling_time += std::chrono::duration_cast(recompiling_end - recompiling_start); } - // TODO: Reduce the priority of the recompilation engine thread - - if (num_processed == 0) { - // If we get here, it means the recompilation engine is idling. - // We should use this oppurtunity to optimize the code. + if (!work_done_this_iteration) { + // Wait a few ms for something to happen + auto idling_start = std::chrono::high_resolution_clock::now(); + WaitForAnySignal(250); + auto idling_end = std::chrono::high_resolution_clock::now(); + idling_time += std::chrono::duration_cast(idling_end - idling_start); } } @@ -4955,6 +4984,7 @@ void RecompilationEngine::Task() { Log() << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n"; Log() << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n"; Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; + Log() << " Time spent recompiling = " << recompiling_time.count() / 1000000 << "ms\n"; Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; Log() << "Ordinals allocated = " << m_next_ordinal << "\n"; @@ -4968,6 +4998,8 @@ void RecompilationEngine::Task() { } void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) { + auto function_i = m_function_table.end(); + auto execution_trace_id = execution_trace.GetId(); auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); if (processed_execution_trace_i == m_processed_execution_traces.end()) { @@ -4992,16 +5024,16 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution if (block_i == m_block_table.end()) { block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); - // Update the function to block map - auto function_to_block_i = m_function_to_blocks.find(execution_trace.function_address); - if (function_to_block_i == m_function_to_blocks.end()) { - function_to_block_i = m_function_to_blocks.insert(m_function_to_blocks.end(), std::make_pair(execution_trace.function_address, std::vector())); + if (function_i == m_function_table.end()) { + FunctionEntry key(execution_trace.function_address); + function_i = m_function_table.find(&key); + if (function_i == m_function_table.end()) { + function_i = m_function_table.insert(m_function_table.end(), new FunctionEntry(key.address)); + } } - auto i = std::find(function_to_block_i->second.begin(), function_to_block_i->second.end(), *block_i); - if (i == function_to_block_i->second.end()) { - function_to_block_i->second.push_back(*block_i); - } + // Update the function table + (*function_i)->AddBlock(*block_i); } tmp_block_list.push_back(*block_i); @@ -5024,7 +5056,12 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution if (!(*i)->is_compiled) { (*i)->num_hits++; if ((*i)->num_hits >= 1000) { // TODO: Make this configurable - CompileBlock(*(*i)); + if (function_i == m_function_table.end()) { + FunctionEntry key(execution_trace.function_address); + function_i = m_function_table.find(&key); + } + + CompileBlock(*(*function_i), *(*i)); (*i)->is_compiled = true; } } @@ -5057,17 +5094,16 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const E } } -void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { +void RecompilationEngine::CompileBlock(FunctionEntry & function_entry, BlockEntry & block_entry) { #ifdef _DEBUG Log() << "Compile: " << block_entry.ToString() << "\n"; #endif - ControlFlowGraph * cfg; ControlFlowGraph temp_cfg(block_entry.cfg.start_address, block_entry.cfg.function_address); + ControlFlowGraph * cfg; if (block_entry.IsFunction()) { // Form a CFG by merging all the blocks in this function - auto function_to_block_i = m_function_to_blocks.find(block_entry.cfg.function_address); - for (auto block_i = function_to_block_i->second.begin(); block_i != function_to_block_i->second.end(); block_i++) { + for (auto block_i = function_entry.blocks.begin(); block_i != function_entry.blocks.end(); block_i++) { temp_cfg += (*block_i)->cfg; } @@ -5084,6 +5120,12 @@ void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), *cfg, true, block_entry.IsFunction() ? true : false /*generate_linkable_exits*/); m_executable_lookup[ordinal] = executable; + + if (block_entry.IsFunction()) { + function_entry.num_compiled_fragments = 0; + } else { + function_entry.num_compiled_fragments++; + } } std::shared_ptr RecompilationEngine::GetInstance() { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 20523aa5a4..c5de00940e 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -246,55 +246,6 @@ namespace ppu_recompiler_llvm { /// Pointer to an executable typedef u32(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context); - /// An entry in the block table - struct BlockEntry { - /// Number of times this block was hit - u32 num_hits; - - /// The current revision number of this function - u32 revision; - - /// The CFG for this block - ControlFlowGraph cfg; - - /// Indicates whether the block has been compiled or not - bool is_compiled; - - BlockEntry(u32 start_address, u32 function_address) - : num_hits(0) - , revision(0) - , is_compiled(false) - , cfg(start_address, function_address) { - } - - std::string ToString() const { - return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, IsCompiled=%c", - cfg.start_address, cfg.function_address, num_hits, revision, is_compiled ? 'Y' : 'N'); - } - - bool operator == (const BlockEntry & other) const { - return cfg.start_address == other.cfg.start_address; - } - - bool IsFunction() const { - return cfg.function_address == cfg.start_address; - } - - struct hash { - size_t operator()(const BlockEntry * e) const { - return e->cfg.start_address; - } - }; - - struct equal_to { - bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const { - return *lhs == *rhs; - } - }; - }; -} - -namespace ppu_recompiler_llvm { /// PPU compiler that uses LLVM for code generation and optimization class Compiler : protected PPUOpcodes, protected PPCDecoder { public: @@ -1008,6 +959,102 @@ namespace ppu_recompiler_llvm { static std::shared_ptr GetInstance(); private: + /// An entry in the block table + struct BlockEntry { + /// Number of times this block was hit + u32 num_hits; + + /// The current revision number of this function + u32 revision; + + /// The CFG for this block + ControlFlowGraph cfg; + + /// Indicates whether the block has been compiled or not + bool is_compiled; + + BlockEntry(u32 start_address, u32 function_address) + : num_hits(0) + , revision(0) + , is_compiled(false) + , cfg(start_address, function_address) { + } + + std::string ToString() const { + return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, IsCompiled=%c", + cfg.start_address, cfg.function_address, num_hits, revision, is_compiled ? 'Y' : 'N'); + } + + bool operator == (const BlockEntry & other) const { + return cfg.start_address == other.cfg.start_address; + } + + bool IsFunction() const { + return cfg.function_address == cfg.start_address; + } + + struct hash { + size_t operator()(const BlockEntry * e) const { + return e->cfg.start_address; + } + }; + + struct equal_to { + bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const { + return *lhs == *rhs; + } + }; + }; + + /// An entry in the function table + struct FunctionEntry { + /// Address of the function + u32 address; + + /// Number of compiled fragments + u32 num_compiled_fragments; + + /// Blocks in the function + std::list blocks; + + FunctionEntry(u32 address) + : address(address) + , num_compiled_fragments(0) { + } + + void AddBlock(BlockEntry * block_entry) { + auto i = std::find(blocks.begin(), blocks.end(), block_entry); + if (i == blocks.end()) { + if (block_entry->IsFunction()) { + // The first block must be the starting block of the function + blocks.push_front(block_entry); + } else { + blocks.push_back(block_entry); + } + } + } + + std::string ToString() const { + return fmt::Format("0x%08X: NumCompiledFragments=%u, NumBlocks=%u", address, num_compiled_fragments, blocks.size()); + } + + bool operator == (const FunctionEntry & other) const { + return address == other.address; + } + + struct hash { + size_t operator()(const FunctionEntry * f) const { + return f->address; + } + }; + + struct equal_to { + bool operator()(const FunctionEntry * lhs, const FunctionEntry * rhs) const { + return *lhs == *rhs; + } + }; + }; + /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. std::mutex m_pending_execution_traces_lock; @@ -1017,8 +1064,8 @@ namespace ppu_recompiler_llvm { /// Block table std::unordered_set m_block_table; - /// Maps a function to the set of all blocks in the function. Key is the address of the function. - std::unordered_map> m_function_to_blocks; + /// Function table + std::unordered_set m_function_table; /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. std::unordered_map> m_processed_execution_traces; @@ -1057,7 +1104,7 @@ namespace ppu_recompiler_llvm { void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry); /// Compile a block - void CompileBlock(BlockEntry & block_entry); + void CompileBlock(FunctionEntry & function_entry, BlockEntry & block_entry); /// Mutex used to prevent multiple creation static std::mutex s_mutex; From 87accc624ff6513f90f5bedc3aeb1f10fef1c9a1 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 10 Nov 2014 12:49:48 +0530 Subject: [PATCH 11/27] Fixed some errors thrown by gcc/clang --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 10 +++++----- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 30 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 85d616d633..0ed3301b29 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -5094,15 +5094,15 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const E cfg.branches[this_entry.GetPrimaryAddress()].insert(next_entry->GetPrimaryAddress()); } } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { - cfg.calls[this_entry.instruction.address].insert(next_entry->GetPrimaryAddress()); + cfg.calls[this_entry.data.instruction.address].insert(next_entry->GetPrimaryAddress()); } } } else if (this_entry.type == ExecutionTraceEntry::Type::CompiledBlock) { if (next_entry) { if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { - cfg.branches[this_entry.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); + cfg.branches[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { - cfg.calls[this_entry.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); + cfg.calls[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); } } } @@ -5190,8 +5190,8 @@ void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { case TraceType::Instruction: // arg1 is the address of the instruction for (int i = (int)m_stack.back()->entries.size() - 1; i >= 0; i--) { - if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].instruction.address == arg1) || - (m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].compiled_block.entry_address == arg1)) { + if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].data.instruction.address == arg1) || + (m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].data.compiled_block.entry_address == arg1)) { // Found a loop execution_trace = new ExecutionTrace(m_stack.back()->function_address); execution_trace->type = ExecutionTrace::Type::Loop; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 680a8988df..edee34d2dd 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -38,7 +38,7 @@ namespace ppu_recompiler_llvm { u32 entry_address; u32 exit_address; } compiled_block; - }; + } data; /// The type of the entry enum class Type { @@ -51,14 +51,14 @@ namespace ppu_recompiler_llvm { : type(type) { switch (type) { case Type::Instruction: - instruction.address = arg1; + data.instruction.address = arg1; break; case Type::FunctionCall: - function_call.address = arg1; + data.function_call.address = arg1; break; case Type::CompiledBlock: - compiled_block.entry_address = arg1; - compiled_block.exit_address = arg2; + data.compiled_block.entry_address = arg1; + data.compiled_block.exit_address = arg2; break; default: assert(0); @@ -69,11 +69,11 @@ namespace ppu_recompiler_llvm { u32 GetPrimaryAddress() const { switch (type) { case Type::Instruction: - return instruction.address; + return data.instruction.address; case Type::FunctionCall: - return function_call.address; + return data.function_call.address; case Type::CompiledBlock: - return compiled_block.entry_address; + return data.compiled_block.entry_address; default: assert(0); return 0; @@ -83,11 +83,11 @@ namespace ppu_recompiler_llvm { std::string ToString() const { switch (type) { case Type::Instruction: - return fmt::Format("I:0x%08X", instruction.address); + return fmt::Format("I:0x%08X", data.instruction.address); case Type::FunctionCall: - return fmt::Format("F:0x%08X", function_call.address); + return fmt::Format("F:0x%08X", data.function_call.address); case Type::CompiledBlock: - return fmt::Format("C:0x%08X-0x%08X", compiled_block.entry_address, compiled_block.exit_address); + return fmt::Format("C:0x%08X-0x%08X", data.compiled_block.entry_address, data.compiled_block.exit_address); default: assert(0); return ""; @@ -98,15 +98,15 @@ namespace ppu_recompiler_llvm { u64 hash = ((u64)type << 32); switch (type) { case Type::Instruction: - hash |= instruction.address; + hash |= data.instruction.address; break; case Type::FunctionCall: - hash |= function_call.address; + hash |= data.function_call.address; break; case Type::CompiledBlock: - hash = compiled_block.exit_address; + hash = data.compiled_block.exit_address; hash <<= 32; - hash |= compiled_block.entry_address; + hash |= data.compiled_block.entry_address; break; default: assert(0); From 1568d2d602b8febb0b78740166eff3594e157018 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 10 Nov 2014 14:32:41 +0530 Subject: [PATCH 12/27] Fixed some more gcc/clang compilation errors --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 29 ++++++++++++++-------------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 12 ++++++------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 0ed3301b29..aad5ca6d0f 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -26,7 +26,7 @@ using namespace ppu_recompiler_llvm; u64 Compiler::s_rotate_mask[64][64]; bool Compiler::s_rotate_mask_inited = false; -Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable unknown_function, const Executable unknown_block) +Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, const Executable execute_unknown_block) : m_recompilation_engine(recompilation_engine) { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); @@ -70,13 +70,13 @@ Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable arg_types.push_back(m_ir_builder->getInt64Ty()); m_compiled_function_type = FunctionType::get(m_ir_builder->getInt32Ty(), arg_types, false); - m_unknown_function = (Function *)m_module->getOrInsertFunction("unknown_function", m_compiled_function_type); - m_unknown_function->setCallingConv(CallingConv::X86_64_Win64); - m_execution_engine->addGlobalMapping(m_unknown_function, unknown_function); + m_execute_unknown_function = (Function *)m_module->getOrInsertFunction("execute_unknown_function", m_compiled_function_type); + m_execute_unknown_function->setCallingConv(CallingConv::X86_64_Win64); + m_execution_engine->addGlobalMapping(m_execute_unknown_function, (void *)execute_unknown_function); - m_unknown_block = (Function *)m_module->getOrInsertFunction("unknown_block", m_compiled_function_type); - m_unknown_block->setCallingConv(CallingConv::X86_64_Win64); - m_execution_engine->addGlobalMapping(m_unknown_block, unknown_block); + m_execute_unknown_block = (Function *)m_module->getOrInsertFunction("execute_unknown_block", m_compiled_function_type); + m_execute_unknown_block->setCallingConv(CallingConv::X86_64_Win64); + m_execution_engine->addGlobalMapping(m_execute_unknown_block, (void *)execute_unknown_block); if (!s_rotate_mask_inited) { InitRotateMask(); @@ -178,7 +178,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & m_ir_builder->SetInsertPoint(then_bb); context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty()); context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - m_ir_builder->CreateCall3(m_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); + m_ir_builder->CreateCall3(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(merge_bb); @@ -204,7 +204,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & m_ir_builder->SetInsertPoint(then_bb); auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - m_ir_builder->CreateCall3(m_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); + m_ir_builder->CreateCall3(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(merge_bb); @@ -4660,7 +4660,7 @@ void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool auto switch_instr = m_ir_builder->CreateSwitch(target_i32, unknown_function_block); m_ir_builder->SetInsertPoint(unknown_function_block); - m_ir_builder->CreateCall3(m_unknown_function, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], m_ir_builder->getInt64(0)); + m_ir_builder->CreateCall3(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], m_ir_builder->getInt64(0)); m_ir_builder->CreateBr(next_block); auto call_i = m_state.cfg->calls.find(m_state.current_instruction_address); @@ -4859,9 +4859,10 @@ std::shared_ptr RecompilationEngine::s_the_instance = nullp RecompilationEngine::RecompilationEngine() : ThreadBase("PPU Recompilation Engine") , m_next_ordinal(0) - , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) - , m_log("PPULLVMRecompiler.log", std::string(), sys::fs::F_Text) { - m_log.SetUnbuffered(); + , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) { + std::string error; + m_log = new raw_fd_ostream("PPULLVMRecompiler.log", error, sys::fs::F_Text); + m_log->SetUnbuffered(); } RecompilationEngine::~RecompilationEngine() { @@ -4918,7 +4919,7 @@ void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { } raw_fd_ostream & RecompilationEngine::Log() { - return m_log; + return *m_log; } void RecompilationEngine::Task() { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index edee34d2dd..0da0c1835e 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -269,7 +269,7 @@ namespace ppu_recompiler_llvm { std::map interpreter_fallback_stats; }; - Compiler(RecompilationEngine & recompilation_engine, const Executable unknown_function, const Executable unknown_block); + Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, const Executable execute_unknown_block); Compiler(const Compiler & other) = delete; Compiler(Compiler && other) = delete; @@ -731,11 +731,11 @@ namespace ppu_recompiler_llvm { /// Recompilation engine RecompilationEngine & m_recompilation_engine; - /// The function that will be called to process unknown functions - llvm::Function * m_unknown_function; + /// The function that will be called to execute unknown functions + llvm::Function * m_execute_unknown_function; - /// The executable that will be called to process unknown blocks - llvm::Function * m_unknown_block; + /// The executable that will be called to execute unknown blocks + llvm::Function * m_execute_unknown_block; /// LLVM context llvm::LLVMContext * m_llvm_context; @@ -1087,7 +1087,7 @@ namespace ppu_recompiler_llvm { Compiler m_compiler; /// Log - llvm::raw_fd_ostream m_log; + llvm::raw_fd_ostream * m_log; /// Executable lookup table Executable m_executable_lookup[10000]; // TODO: Adjust size From 4185c1e42298a8b88b2d7d1ab95af0404f05224c Mon Sep 17 00:00:00 2001 From: Fabian Schaffert Date: Wed, 12 Nov 2014 23:25:27 +0100 Subject: [PATCH 13/27] Fixes segfault described in #794 Fixes bug in sys_semaphore_create() when a NULL pointer address is passed in sem or attr. Fixes bug in sys_semaphore_get_value() when a NULL pointer address is passed in count. --- rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp b/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp index c7dc32e5c3..e5fd7463b5 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp @@ -26,6 +26,16 @@ s32 sys_semaphore_create(vm::ptr sem, vm::ptr attr sys_semaphore.Warning("sys_semaphore_create(sem_addr=0x%x, attr_addr=0x%x, initial_count=%d, max_count=%d)", sem.addr(), attr.addr(), initial_count, max_count); + if (sem.addr() == NULL) { + sys_semaphore.Error("sys_semaphore_create(): invalid memory access (sem_addr=0x%x)", sem.addr()); + return CELL_EFAULT; + } + + if (attr.addr() == NULL) { + sys_semaphore.Error("sys_semaphore_create(): An invalid argument value is specified (attr_addr=0x%x)", attr.addr()); + return CELL_EINVAL; + } + if (max_count <= 0 || initial_count > max_count || initial_count < 0) { sys_semaphore.Error("sys_semaphore_create(): invalid parameters (initial_count=%d, max_count=%d)", initial_count, max_count); @@ -204,6 +214,11 @@ s32 sys_semaphore_get_value(u32 sem_id, vm::ptr count) { sys_semaphore.Log("sys_semaphore_get_value(sem_id=%d, count_addr=0x%x)", sem_id, count.addr()); + if (count.addr() == NULL) { + sys_semaphore.Error("sys_semaphore_get_value(): invalid memory access (count=0x%x)", count.addr()); + return CELL_EFAULT; + } + Semaphore* sem; if (!Emu.GetIdManager().GetIDData(sem_id, sem)) { From 6906d146bbcd0118aa0686a04eb34cd6f6a57f31 Mon Sep 17 00:00:00 2001 From: Fabian Schaffert Date: Sat, 15 Nov 2014 00:16:17 +0100 Subject: [PATCH 14/27] Adds copy constructor for class InstrBase A copy constructor is necessarry for `class InstrBase`, as the implicit copy constructor simply copies the pointer `m_args`. This results in a double delete of the same memory region, causing a segmentation fault when rpcs3 exited. --- rpcs3/Emu/CPU/CPUDecoder.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rpcs3/Emu/CPU/CPUDecoder.h b/rpcs3/Emu/CPU/CPUDecoder.h index df28e54467..d1faeea13d 100644 --- a/rpcs3/Emu/CPU/CPUDecoder.h +++ b/rpcs3/Emu/CPU/CPUDecoder.h @@ -333,6 +333,17 @@ public: }); } + InstrBase(const InstrBase &source) + : InstrCaller(source) + , m_name(source.m_name) + , m_opcode(source.m_opcode) + , m_args_count(source.m_args_count) + , m_args(source.m_args_count ? new CodeFieldBase*[source.m_args_count] : nullptr) + { + for(int i = 0; i < source.m_args_count; ++i) + m_args[i] = source.m_args[i]; + } + virtual ~InstrBase() { if (m_args) { From 3af57957d584a082642d7342aaad35eceec693b6 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 15 Nov 2014 16:45:02 +0200 Subject: [PATCH 15/27] cellSail fixes and GetRegBySPR split up * Fixed cellSailPlayerAddDescriptor * Fixed cellSailDescriptorSetAutoSelection * Fixed cellSailDescriptorIsAutoSelection * Split GetRegBySPR into ReadSPR and WriteSPR * Added 0x10c for ReadSPR and WriteSPR (Time-based register) --- rpcs3/Emu/Cell/PPUInterpreter.h | 28 +++++++++++++++++++---- rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp | 10 ++++---- rpcs3/Emu/SysCalls/Modules/cellSail.cpp | 17 ++++++++++---- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index ea6f0d575d..cb6f972e38 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -133,21 +133,39 @@ private: return ctr_ok && cond_ok; } - u64& GetRegBySPR(u32 spr) + u64 ReadSPR(u32 spr) { const u32 n = (spr >> 5) | ((spr & 0x1f) << 5); - switch(n) + switch (n) { case 0x001: return CPU.XER.XER; case 0x008: return CPU.LR; case 0x009: return CPU.CTR; case 0x100: return CPU.USPRG0; + case 0x10C: return CPU.TBL; } - UNK(fmt::Format("GetRegBySPR error: Unknown SPR 0x%x!", n)); + UNK(fmt::Format("ReadSPR error: Unknown SPR 0x%x!", n)); return CPU.XER.XER; } + + void WriteSPR(u32 spr, u64 value) + { + const u32 n = (spr >> 5) | ((spr & 0x1f) << 5); + + switch (n) + { + case 0x001: CPU.XER.XER = value; return; + case 0x008: CPU.LR = value; return; + case 0x009: CPU.CTR = value; return; + case 0x100: CPU.USPRG0 = value; return; + case 0x10C: CPU.TBL = value; return; + } + + UNK(fmt::Format("WriteSPR error: Unknown SPR 0x%x!", n)); + return; + } void TDI(u32 to, u32 ra, s32 simm16) { @@ -2911,7 +2929,7 @@ private: } void MFSPR(u32 rd, u32 spr) { - CPU.GPR[rd] = GetRegBySPR(spr); + CPU.GPR[rd] = ReadSPR(spr); } void LWAX(u32 rd, u32 ra, u32 rb) { @@ -3062,7 +3080,7 @@ private: } void MTSPR(u32 spr, u32 rs) { - GetRegBySPR(spr) = CPU.GPR[rs]; + WriteSPR(spr, CPU.GPR[rs]); } void DCBI(u32 ra, u32 rb) { diff --git a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp index 31eae6d11c..db80ed037f 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp @@ -43,18 +43,18 @@ u32 map_offset_pos = 0; u32 gcmGetLocalMemorySize(u32 sdk_version) { if (sdk_version >= 0x00220000) { - return 0x0F900000; // 249MB + return 0x0F900000; // 249MB } if (sdk_version >= 0x00200000) { - return 0x0F200000; // 242MB + return 0x0F200000; // 242MB } if (sdk_version >= 0x00190000) { - return 0x0EA00000; // 234MB + return 0x0EA00000; // 234MB } if (sdk_version >= 0x00180000) { - return 0x0E800000; // 232MB + return 0x0E800000; // 232MB } - return 0x0E000000; // 224MB + return 0x0E000000; // 224MB } CellGcmOffsetTable offsetTable; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSail.cpp b/rpcs3/Emu/SysCalls/Modules/cellSail.cpp index 323e33a172..3e22257a26 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSail.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSail.cpp @@ -71,17 +71,24 @@ int cellSailDescriptorGetMediaInfo() int cellSailDescriptorSetAutoSelection(vm::ptr pSelf, bool autoSelection) { - cellSail->Todo("cellSailDescriptorSetAutoSelection(pSelf_addr=0x%x, autoSelection=%b)", pSelf.addr(), autoSelection); + cellSail->Todo("cellSailDescriptorSetAutoSelection(pSelf_addr=0x%x, autoSelection=%s)", pSelf.addr(), autoSelection ? "true" : "false"); - pSelf->autoSelection = autoSelection; + if (pSelf) { + pSelf->autoSelection = autoSelection; + return autoSelection; + } - return autoSelection; + return CELL_OK; } int cellSailDescriptorIsAutoSelection(vm::ptr pSelf) { cellSail->Warning("cellSailDescriptorIsAutoSelection(pSelf_addr=0x%x)", pSelf.addr()); - return pSelf->autoSelection; + + if (pSelf) + return pSelf->autoSelection; + + return CELL_OK; } int cellSailDescriptorCreateDatabase() @@ -595,7 +602,7 @@ int cellSailPlayerAddDescriptor(vm::ptr pSelf, vm::ptrWarning("cellSailPlayerAddDescriptor(pSelf_addr=0x%x, pDesc_addr=0x%x)", pSelf.addr(), pDesc.addr()); - if (pSelf->descriptors < 3 && pDesc) + if (pSelf && pSelf->descriptors < 3 && pDesc) { pSelf->descriptors++; pSelf->registeredDescriptors[pSelf->descriptors] = pDesc; From 3b31a9233d290fb0e5c6fb63f5d6c59a79504b33 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 15 Nov 2014 22:47:30 +0800 Subject: [PATCH 16/27] Check pSelf->pAddr is not NULL before setup pAddr pointer --- rpcs3/Emu/SysCalls/Modules/cellPamf.cpp | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/rpcs3/Emu/SysCalls/Modules/cellPamf.cpp b/rpcs3/Emu/SysCalls/Modules/cellPamf.cpp index 93438d8029..ac359d0e1c 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellPamf.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellPamf.cpp @@ -196,6 +196,10 @@ int cellPamfReaderGetPresentationStartTime(vm::ptr pSelf, vm::pt { cellPamf->Warning("cellPamfReaderGetPresentationStartTime(pSelf=0x%x, pTimeStamp_addr=0x%x)", pSelf.addr(), pTimeStamp.addr()); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); const u32 upper = (u16)pAddr->start_pts_high; pTimeStamp->upper = upper; @@ -207,6 +211,10 @@ int cellPamfReaderGetPresentationEndTime(vm::ptr pSelf, vm::ptr< { cellPamf->Warning("cellPamfReaderGetPresentationEndTime(pSelf=0x%x, pTimeStamp_addr=0x%x)", pSelf.addr(), pTimeStamp.addr()); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); const u32 upper = (u16)pAddr->end_pts_high; pTimeStamp->upper = upper; @@ -218,6 +226,10 @@ int cellPamfReaderGetMuxRateBound(vm::ptr pSelf) { cellPamf->Warning("cellPamfReaderGetMuxRateBound(pSelf=0x%x)", pSelf.addr()); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); return pAddr->mux_rate_max; } @@ -226,6 +238,10 @@ int cellPamfReaderGetNumberOfStreams(vm::ptr pSelf) { cellPamf->Warning("cellPamfReaderGetNumberOfStreams(pSelf=0x%x)", pSelf.addr()); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); return pAddr->stream_count; } @@ -233,6 +249,10 @@ int cellPamfReaderGetNumberOfStreams(vm::ptr pSelf) int cellPamfReaderGetNumberOfSpecificStreams(vm::ptr pSelf, u8 streamType) { cellPamf->Warning("cellPamfReaderGetNumberOfSpecificStreams(pSelf=0x%x, streamType=%d)", pSelf.addr(), streamType); + + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } vm::ptr pAddr(pSelf->pAddr); @@ -265,6 +285,10 @@ int cellPamfReaderSetStreamWithIndex(vm::ptr pSelf, u8 streamInd { cellPamf->Warning("cellPamfReaderSetStreamWithIndex(pSelf=0x%x, streamIndex=%d)", pSelf.addr(), streamIndex); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); if (streamIndex < pAddr->stream_count) @@ -283,6 +307,10 @@ int cellPamfReaderSetStreamWithTypeAndChannel(vm::ptr pSelf, u8 { cellPamf->Warning("cellPamfReaderSetStreamWithTypeAndChannel(pSelf=0x%x, streamType=%d, ch=%d)", pSelf.addr(), streamType, ch); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); if (streamType > 5) @@ -311,6 +339,10 @@ int cellPamfReaderSetStreamWithTypeAndIndex(vm::ptr pSelf, u8 st { cellPamf->Warning("cellPamfReaderSetStreamWithTypeAndIndex(pSelf=0x%x, streamType=%d, streamIndex=%d)", pSelf.addr(), streamType, streamIndex); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); u32 found = 0; @@ -390,6 +422,10 @@ int cellPamfReaderGetStreamInfo(vm::ptr pSelf, u32 pInfo_addr, u { cellPamf->Warning("cellPamfReaderGetStreamInfo(pSelf=0x%x, stream=%d, pInfo_addr=0x%x, size=%d)", pSelf.addr(), pSelf->stream, pInfo_addr, size); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); memset(vm::get_ptr(pInfo_addr), 0, size); @@ -497,6 +533,10 @@ int cellPamfReaderGetNumberOfEp(vm::ptr pSelf) { cellPamf->Warning("cellPamfReaderGetNumberOfEp(pSelf=0x%x, stream=%d)", pSelf.addr(), pSelf->stream); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); return pAddr->stream_headers[pSelf->stream].ep_num; } @@ -505,6 +545,10 @@ int cellPamfReaderGetEpIteratorWithIndex(vm::ptr pSelf, u32 epIn { cellPamf->Todo("cellPamfReaderGetEpIteratorWithIndex(pSelf=0x%x, stream=%d, epIndex=%d, pIt_addr=0x%x)", pSelf.addr(), pSelf->stream, epIndex, pIt.addr()); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); //TODO: return CELL_OK; @@ -514,6 +558,10 @@ int cellPamfReaderGetEpIteratorWithTimeStamp(vm::ptr pSelf, vm:: { cellPamf->Todo("cellPamfReaderGetEpIteratorWithTimeStamp(pSelf=0x%x, pTimeStamp_addr=0x%x, pIt_addr=0x%x)", pSelf.addr(), pTimeStamp.addr(), pIt.addr()); + if (!pSelf->pAddr) { + return CELL_PAMF_ERROR_INVALID_PAMF; + } + vm::ptr pAddr(pSelf->pAddr); //TODO: From f507724b5cc28cccd61ae25711e8b3e007cf525c Mon Sep 17 00:00:00 2001 From: Fabian Schaffert Date: Sat, 15 Nov 2014 21:30:01 +0100 Subject: [PATCH 17/27] Fixes build in Debug mode on linux The changes introduced in commit 80294e1 makes the mcdisassembler component of LLVM necessary in debug mode to successfully link rpcs3. --- rpcs3/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index cf55a5ca0b..9245ac9125 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -81,7 +81,11 @@ ${LLVM_INCLUDE_DIRS} add_definitions(${LLVM_DEFINITIONS}) add_definitions(-DLLVM_AVAILABLE) -llvm_map_components_to_libnames(LLVM_LIBS jit vectorize x86codegen x86disassembler) +if (CMAKE_BUILD_TYPE STREQUAL "Release") + llvm_map_components_to_libnames(LLVM_LIBS jit vectorize x86codegen x86disassembler) +else() + llvm_map_components_to_libnames(LLVM_LIBS jit vectorize x86codegen x86disassembler mcdisassembler) +endif() link_directories("${RPCS3_SRC_DIR}/../ffmpeg/${PLATFORM_ARCH}/lib") From 05f91dc293191ef3c18091ad94dc021d3313a38d Mon Sep 17 00:00:00 2001 From: Zangetsu Date: Sun, 16 Nov 2014 05:40:02 +0100 Subject: [PATCH 18/27] Update SLN Delete version express and Update Number for VS2013 Update 4 --- rpcs3.sln | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3.sln b/rpcs3.sln index 979445e32a..e594502b11 100644 --- a/rpcs3.sln +++ b/rpcs3.sln @@ -1,6 +1,6 @@ Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Express 2013 for Windows Desktop -VisualStudioVersion = 12.0.30723.0 +# Visual Studio 2013 +VisualStudioVersion = 12.0.31101.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rpcs3", "rpcs3\rpcs3.vcxproj", "{70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}" ProjectSection(ProjectDependencies) = postProject From 25ba18e8a76ba12472a9baba4758e783940c3202 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sun, 16 Nov 2014 19:05:41 +0800 Subject: [PATCH 19/27] Check address is not null for cellGcmAddressToOffset --- rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp index db80ed037f..f51efc2615 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp @@ -803,7 +803,8 @@ s32 cellGcmAddressToOffset(u64 address, vm::ptr> offset) cellGcmSys->Log("cellGcmAddressToOffset(address=0x%x,offset_addr=0x%x)", address, offset.addr()); // Address not on main memory or local memory - if (address >= 0xD0000000) { + if (!address || address >= 0xD0000000) { + cellGcmSys->Error("cellGcmAddressToOffset(address=0x%x,offset_addr=0x%x)", address, offset.addr()); return CELL_GCM_ERROR_FAILURE; } From 00e0e857934b52d4078fb0d9851b1b861bee3f1e Mon Sep 17 00:00:00 2001 From: Fabian Schaffert Date: Sun, 16 Nov 2014 14:43:58 +0100 Subject: [PATCH 20/27] Fixes return value of sys_semaphore_create() In case of att.addr() being NULL, CELL_EFAULT must be returnd and not CELL_EINVAL, according to the corresponding ps3autotest. --- rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp b/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp index e5fd7463b5..d194a0b563 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp @@ -33,7 +33,7 @@ s32 sys_semaphore_create(vm::ptr sem, vm::ptr attr if (attr.addr() == NULL) { sys_semaphore.Error("sys_semaphore_create(): An invalid argument value is specified (attr_addr=0x%x)", attr.addr()); - return CELL_EINVAL; + return CELL_EFAULT; } if (max_count <= 0 || initial_count > max_count || initial_count < 0) From e9ab9f51fc38596890df07411ebb357e209d174c Mon Sep 17 00:00:00 2001 From: Fabian Schaffert Date: Sun, 16 Nov 2014 20:48:22 +0100 Subject: [PATCH 21/27] Fixes segfaults for sys_event_flag_(create/get) Neither sys_event_flag_create() nor sys_event_flag_get() checked for NULL pointers in their arguments, which caused the corresponding test from ps3autotests to segfault. --- rpcs3/Emu/SysCalls/lv2/sys_event_flag.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rpcs3/Emu/SysCalls/lv2/sys_event_flag.cpp b/rpcs3/Emu/SysCalls/lv2/sys_event_flag.cpp index 244d19cec4..336c88c4bb 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_event_flag.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_event_flag.cpp @@ -42,6 +42,18 @@ s32 sys_event_flag_create(vm::ptr eflag_id, vm::ptr at sys_event_flag.Warning("sys_event_flag_create(eflag_id_addr=0x%x, attr_addr=0x%x, init=0x%llx)", eflag_id.addr(), attr.addr(), init); + if (eflag_id.addr() == NULL) + { + sys_event_flag.Error("sys_event_flag_create(): invalid memory access (eflag_id_addr=0x%x)", eflag_id.addr()); + return CELL_EFAULT; + } + + if (attr.addr() == NULL) + { + sys_event_flag.Error("sys_event_flag_create(): invalid memory access (attr_addr=0x%x)", attr.addr()); + return CELL_EFAULT; + } + switch (attr->protocol.ToBE()) { case se32(SYS_SYNC_PRIORITY): break; @@ -358,6 +370,12 @@ s32 sys_event_flag_get(u32 eflag_id, vm::ptr flags) { sys_event_flag.Log("sys_event_flag_get(eflag_id=%d, flags_addr=0x%x)", eflag_id, flags.addr()); + if (flags.addr() == NULL) + { + sys_event_flag.Error("sys_event_flag_create(): invalid memory access (flags_addr=0x%x)", flags.addr()); + return CELL_EFAULT; + } + EventFlag* ef; if (!sys_event_flag.CheckId(eflag_id, ef)) return CELL_ESRCH; From cd347fa1103f1cb6ae3886b60aafc0ca92a584c6 Mon Sep 17 00:00:00 2001 From: raven02 Date: Tue, 18 Nov 2014 14:44:37 +0800 Subject: [PATCH 22/27] Minor fix --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3310425c34..2af22b20ec 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -2037,8 +2037,12 @@ void GLGSRender::Flip() glReadPixels(0, 0, RSXThread::m_width, RSXThread::m_height, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 0); checkForGlError("Flip(): glReadPixels(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8)"); GLubyte *packed = (GLubyte *)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); - memcpy(pixels.data(), packed, RSXThread::m_width * RSXThread::m_height * 4); - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + if (packed) + { + memcpy(pixels.data(), packed, RSXThread::m_width * RSXThread::m_height * 4); + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + checkForGlError("Flip(): glUnmapBuffer"); + } glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); src_buffer = pixels.data(); From f99353f649584c272c4cc5acf032b5fd9ef21915 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Tue, 18 Nov 2014 18:03:49 +0200 Subject: [PATCH 23/27] Fix for TB SPR and disable writing to TB SPR Now use get_time() like MFTB instead of reading for the time-based SPR. --- rpcs3/Emu/Cell/PPUInterpreter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 5e721ccf5f..104418d7bd 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -147,7 +147,7 @@ private: case 0x008: return CPU.LR; case 0x009: return CPU.CTR; case 0x100: return CPU.USPRG0; - case 0x10C: return CPU.TBL; + case 0x10C: return get_time(); } UNK(fmt::Format("ReadSPR error: Unknown SPR 0x%x!", n)); @@ -164,7 +164,7 @@ private: case 0x008: CPU.LR = value; return; case 0x009: CPU.CTR = value; return; case 0x100: CPU.USPRG0 = value; return; - case 0x10C: CPU.TBL = value; return; + case 0x10C: UNK("WriteSPR: Write to time-based SPR. Report this to a developer!"); return; } UNK(fmt::Format("WriteSPR error: Unknown SPR 0x%x!", n)); From 598d929abaa47281322b14eb840cd11c959e9692 Mon Sep 17 00:00:00 2001 From: DHrpcs3 Date: Wed, 19 Nov 2014 16:16:30 +0200 Subject: [PATCH 24/27] Implemented LLE Modules Manager (draft) Improved loader core. Implemented *_thread wrappers. Minor fixes. Temporary disabled ELF Compiler & DisAsm frame. --- Utilities/BEType.h | 114 +- rpcs3/Crypto/unself.cpp | 95 ++ rpcs3/Crypto/unself.h | 289 ++++- rpcs3/Emu/ARMv7/ARMv7Thread.cpp | 13 + rpcs3/Emu/ARMv7/ARMv7Thread.h | 45 + rpcs3/Emu/CPU/CPUDecoder.h | 2 +- rpcs3/Emu/CPU/CPUThread.h | 39 + rpcs3/Emu/Cell/PPCDecoder.cpp | 2 +- rpcs3/Emu/Cell/PPUInstrTable.h | 1210 +++++++++--------- rpcs3/Emu/Cell/PPUInterpreter.h | 38 +- rpcs3/Emu/Cell/PPUProgramCompiler.cpp | 71 +- rpcs3/Emu/Cell/PPUProgramCompiler.h | 3 +- rpcs3/Emu/Cell/PPUThread.cpp | 26 +- rpcs3/Emu/Cell/PPUThread.h | 56 +- rpcs3/Emu/Cell/SPUThread.cpp | 14 +- rpcs3/Emu/Cell/SPUThread.h | 46 + rpcs3/Emu/Memory/Memory.cpp | 47 +- rpcs3/Emu/Memory/Memory.h | 11 +- rpcs3/Emu/Memory/vm.cpp | 136 +- rpcs3/Emu/Memory/vm.h | 88 +- rpcs3/Emu/Memory/vm_ptr.h | 152 ++- rpcs3/Emu/SysCalls/ModuleManager.h | 18 + rpcs3/Emu/SysCalls/Modules/cellAvconfExt.cpp | 7 +- rpcs3/Emu/SysCalls/Modules/cellPamf.cpp | 72 +- rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp | 9 +- rpcs3/Emu/SysCalls/Modules/sys_net.cpp | 2 +- rpcs3/Emu/SysCalls/lv2/sys_mmapper.cpp | 4 +- rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp | 10 +- rpcs3/Emu/SysCalls/lv2/sys_prx.cpp | 4 +- rpcs3/Emu/SysCalls/lv2/sys_prx.h | 65 +- rpcs3/Emu/SysCalls/lv2/sys_spu.cpp | 18 +- rpcs3/Emu/SysCalls/lv2/sys_spu.h | 4 + rpcs3/Emu/System.cpp | 151 +-- rpcs3/Emu/System.h | 24 +- rpcs3/Gui/CompilerELF.cpp | 2 +- rpcs3/Gui/DisAsmFrame.cpp | 2 + rpcs3/Gui/DisAsmFrame.h | 6 +- rpcs3/Gui/LLEModulesManager.cpp | 65 + rpcs3/Gui/LLEModulesManager.h | 13 + rpcs3/Gui/MainFrame.cpp | 10 + rpcs3/Gui/MainFrame.h | 1 + rpcs3/Loader/ELF.cpp | 63 - rpcs3/Loader/ELF.h | 53 - rpcs3/Loader/ELF32.cpp | 678 ++-------- rpcs3/Loader/ELF32.h | 260 ++-- rpcs3/Loader/ELF64.cpp | 956 +++++++------- rpcs3/Loader/ELF64.h | 217 ++-- rpcs3/Loader/Loader.cpp | 130 +- rpcs3/Loader/Loader.h | 124 +- rpcs3/Loader/SELF.cpp | 99 -- rpcs3/Loader/SELF.h | 53 - rpcs3/Loader/TROPUSR.cpp | 2 +- rpcs3/emucore.vcxproj | 4 - rpcs3/emucore.vcxproj.filters | 12 - rpcs3/rpcs3.vcxproj | 2 + rpcs3/rpcs3.vcxproj.filters | 6 + rpcs3/stdafx.h | 5 +- 57 files changed, 2844 insertions(+), 2804 deletions(-) create mode 100644 rpcs3/Gui/LLEModulesManager.cpp create mode 100644 rpcs3/Gui/LLEModulesManager.h delete mode 100644 rpcs3/Loader/ELF.cpp delete mode 100644 rpcs3/Loader/ELF.h delete mode 100644 rpcs3/Loader/SELF.cpp delete mode 100644 rpcs3/Loader/SELF.h diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 1bbc0a8114..cfe3dc5b0f 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -451,7 +451,13 @@ template class be_t { static_assert(sizeof(T2) == 1 || sizeof(T2) == 2 || sizeof(T2) == 4 || sizeof(T2) == 8, "Bad be_t type"); - T m_data; + +public: + typedef typename std::remove_cv::type type; + static const bool is_le_machine = true; + +private: + type m_data; template struct _convert @@ -482,55 +488,77 @@ class be_t return (be_t&)res; } }; + public: - typedef T type; - const T& ToBE() const { return m_data; } - T ToLE() const + type ToLE() const { return se_t::func(m_data); } - void FromBE(const T& value) + void FromBE(const type& value) { m_data = value; } - void FromLE(const T& value) + void FromLE(const type& value) { m_data = se_t::func(value); } - static be_t make(const T value) + static be_t MakeFromLE(const type value) { - T data = se_t::func(value); + type data = se_t::func(value); return (be_t&)data; } - //template - operator const T() const + static be_t MakeFromBE(const type value) { - return ToLE(); + return (be_t&)value; + } + + //make be_t from current machine byte ordering + static be_t make(const type value) + { + return is_le_machine ? MakeFromLE(value) : MakeFromBE(value); + } + + //get value in current machine byte ordering + __forceinline type value() const + { + return is_le_machine ? ToLE() : ToBE(); + } + + be_t() = default; + be_t(const be_t& value) = default; + + be_t(type value) + { + m_data = se_t::func(value); } be_t& operator = (const be_t& value) = default; - be_t& operator = (T value) + be_t& operator = (type value) { - m_data = se_t::func(value); + m_data = se_t::func(value); return *this; } + operator type() const + { + return value(); + } + template operator const be_t() const { - return be_t::make(ToLE()); - //return _convert sizeof(T)) ? 1 : (sizeof(T1) < sizeof(T) ? 2 : 0))>::func(m_data); + return _convert sizeof(T)) ? 1 : (sizeof(T1) < sizeof(T) ? 2 : 0))>::func(m_data); } template be_t& operator += (T1 right) { return *this = T(*this) + right; } @@ -577,62 +605,6 @@ public: be_t& operator-- () { *this -= 1; return *this; } }; -template -class be_t -{ - static_assert(sizeof(T2) == 1 || sizeof(T2) == 2 || sizeof(T2) == 4 || sizeof(T2) == 8, "Bad be_t type"); - const T m_data; - -public: - typedef const T type; - - const T& ToBE() const - { - return m_data; - } - - const T ToLE() const - { - return se_t::func(m_data); - } - - static be_t make(const T value) - { - const T data = se_t::func(value); - return (be_t&)data; - } - - //template - operator const T() const - { - return ToLE(); - } - - template - operator const be_t() const - { - return be_t::make(ToLE()); - } - - template be_t operator & (const be_t& right) const { const T res = ToBE() & right.ToBE(); return (be_t&)res; } - template be_t operator | (const be_t& right) const { const T res = ToBE() | right.ToBE(); return (be_t&)res; } - template be_t operator ^ (const be_t& right) const { const T res = ToBE() ^ right.ToBE(); return (be_t&)res; } - - template bool operator == (T1 right) const { return (T1)ToLE() == right; } - template bool operator != (T1 right) const { return !(*this == right); } - template bool operator > (T1 right) const { return (T1)ToLE() > right; } - template bool operator < (T1 right) const { return (T1)ToLE() < right; } - template bool operator >= (T1 right) const { return (T1)ToLE() >= right; } - template bool operator <= (T1 right) const { return (T1)ToLE() <= right; } - - template bool operator == (const be_t& right) const { return ToBE() == right.ToBE(); } - template bool operator != (const be_t& right) const { return !(*this == right); } - template bool operator > (const be_t& right) const { return (T1)ToLE() > right.ToLE(); } - template bool operator < (const be_t& right) const { return (T1)ToLE() < right.ToLE(); } - template bool operator >= (const be_t& right) const { return (T1)ToLE() >= right.ToLE(); } - template bool operator <= (const be_t& right) const { return (T1)ToLE() <= right.ToLE(); } -}; - template struct is_be_t : public std::integral_constant {}; diff --git a/rpcs3/Crypto/unself.cpp b/rpcs3/Crypto/unself.cpp index 67ef26a4b1..28b464c100 100644 --- a/rpcs3/Crypto/unself.cpp +++ b/rpcs3/Crypto/unself.cpp @@ -10,6 +10,101 @@ #include #include + +void WriteEhdr(rFile& f, Elf64_Ehdr& ehdr) +{ +Write32(f, ehdr.e_magic); +Write8(f, ehdr.e_class); +Write8(f, ehdr.e_data); +Write8(f, ehdr.e_curver); +Write8(f, ehdr.e_os_abi); +Write64(f, ehdr.e_abi_ver); +Write16(f, ehdr.e_type); +Write16(f, ehdr.e_machine); +Write32(f, ehdr.e_version); +Write64(f, ehdr.e_entry); +Write64(f, ehdr.e_phoff); +Write64(f, ehdr.e_shoff); +Write32(f, ehdr.e_flags); +Write16(f, ehdr.e_ehsize); +Write16(f, ehdr.e_phentsize); +Write16(f, ehdr.e_phnum); +Write16(f, ehdr.e_shentsize); +Write16(f, ehdr.e_shnum); +Write16(f, ehdr.e_shstrndx); +} +void WritePhdr(rFile& f, Elf64_Phdr& phdr) +{ +Write32(f, phdr.p_type); +Write32(f, phdr.p_flags); +Write64(f, phdr.p_offset); +Write64(f, phdr.p_vaddr); +Write64(f, phdr.p_paddr); +Write64(f, phdr.p_filesz); +Write64(f, phdr.p_memsz); +Write64(f, phdr.p_align); +} +void WriteShdr(rFile& f, Elf64_Shdr& shdr) +{ +Write32(f, shdr.sh_name); +Write32(f, shdr.sh_type); +Write64(f, shdr.sh_flags); +Write64(f, shdr.sh_addr); +Write64(f, shdr.sh_offset); +Write64(f, shdr.sh_size); +Write32(f, shdr.sh_link); +Write32(f, shdr.sh_info); +Write64(f, shdr.sh_addralign); +Write64(f, shdr.sh_entsize); +} +void WriteEhdr(rFile& f, Elf32_Ehdr& ehdr) +{ + Write32(f, ehdr.e_magic); + Write8(f, ehdr.e_class); + Write8(f, ehdr.e_data); + Write8(f, ehdr.e_curver); + Write8(f, ehdr.e_os_abi); + Write64(f, ehdr.e_abi_ver); + Write16(f, ehdr.e_type); + Write16(f, ehdr.e_machine); + Write32(f, ehdr.e_version); + Write32(f, ehdr.e_entry); + Write32(f, ehdr.e_phoff); + Write32(f, ehdr.e_shoff); + Write32(f, ehdr.e_flags); + Write16(f, ehdr.e_ehsize); + Write16(f, ehdr.e_phentsize); + Write16(f, ehdr.e_phnum); + Write16(f, ehdr.e_shentsize); + Write16(f, ehdr.e_shnum); + Write16(f, ehdr.e_shstrndx); +} +void WritePhdr(rFile& f, Elf32_Phdr& phdr) +{ + Write32(f, phdr.p_type); + Write32(f, phdr.p_offset); + Write32(f, phdr.p_vaddr); + Write32(f, phdr.p_paddr); + Write32(f, phdr.p_filesz); + Write32(f, phdr.p_memsz); + Write32(f, phdr.p_flags); + Write32(f, phdr.p_align); +} +void WriteShdr(rFile& f, Elf32_Shdr& shdr) +{ + Write32(f, shdr.sh_name); + Write32(f, shdr.sh_type); + Write32(f, shdr.sh_flags); + Write32(f, shdr.sh_addr); + Write32(f, shdr.sh_offset); + Write32(f, shdr.sh_size); + Write32(f, shdr.sh_link); + Write32(f, shdr.sh_info); + Write32(f, shdr.sh_addralign); + Write32(f, shdr.sh_entsize); +} + + void AppInfo::Load(vfsStream& f) { authid = Read64(f); diff --git a/rpcs3/Crypto/unself.h b/rpcs3/Crypto/unself.h index 53b34bee30..787fc00c7b 100644 --- a/rpcs3/Crypto/unself.h +++ b/rpcs3/Crypto/unself.h @@ -1,6 +1,5 @@ #pragma once -#include "Loader/SELF.h" #include "Loader/ELF64.h" #include "Loader/ELF32.h" #include "key_vault.h" @@ -142,7 +141,8 @@ struct MetadataSectionHeader void Show(); }; -struct SectionHash { +struct SectionHash +{ u8 sha1[20]; u8 padding[12]; u8 hmac_key[64]; @@ -183,6 +183,291 @@ struct SelfSection void Load(vfsStream& f); }; +struct Elf32_Ehdr +{ + u32 e_magic; + u8 e_class; + u8 e_data; + u8 e_curver; + u8 e_os_abi; + u64 e_abi_ver; + u16 e_type; + u16 e_machine; + u32 e_version; + u32 e_entry; + u32 e_phoff; + u32 e_shoff; + u32 e_flags; + u16 e_ehsize; + u16 e_phentsize; + u16 e_phnum; + u16 e_shentsize; + u16 e_shnum; + u16 e_shstrndx; + void Show() {} + bool IsLittleEndian() const + { + return e_data == 1; + } + + void Load(vfsStream& f) + { + e_magic = Read32(f); + e_class = Read8(f); + e_data = Read8(f); + e_curver = Read8(f); + e_os_abi = Read8(f); + + if (IsLittleEndian()) + { + e_abi_ver = Read64LE(f); + e_type = Read16LE(f); + e_machine = Read16LE(f); + e_version = Read32LE(f); + e_entry = Read32LE(f); + e_phoff = Read32LE(f); + e_shoff = Read32LE(f); + e_flags = Read32LE(f); + e_ehsize = Read16LE(f); + e_phentsize = Read16LE(f); + e_phnum = Read16LE(f); + e_shentsize = Read16LE(f); + e_shnum = Read16LE(f); + e_shstrndx = Read16LE(f); + } + else + { + e_abi_ver = Read64(f); + e_type = Read16(f); + e_machine = Read16(f); + e_version = Read32(f); + e_entry = Read32(f); + e_phoff = Read32(f); + e_shoff = Read32(f); + e_flags = Read32(f); + e_ehsize = Read16(f); + e_phentsize = Read16(f); + e_phnum = Read16(f); + e_shentsize = Read16(f); + e_shnum = Read16(f); + e_shstrndx = Read16(f); + } + } + bool CheckMagic() const { return e_magic == 0x7F454C46; } + u32 GetEntry() const { return e_entry; } +}; + +struct Elf32_Shdr +{ + u32 sh_name; + u32 sh_type; + u32 sh_flags; + u32 sh_addr; + u32 sh_offset; + u32 sh_size; + u32 sh_link; + u32 sh_info; + u32 sh_addralign; + u32 sh_entsize; + void Load(vfsStream& f) + { + sh_name = Read32(f); + sh_type = Read32(f); + sh_flags = Read32(f); + sh_addr = Read32(f); + sh_offset = Read32(f); + sh_size = Read32(f); + sh_link = Read32(f); + sh_info = Read32(f); + sh_addralign = Read32(f); + sh_entsize = Read32(f); + } + void LoadLE(vfsStream& f) + { + f.Read(this, sizeof(*this)); + } + void Show() {} +}; +struct Elf32_Phdr +{ + u32 p_type; + u32 p_offset; + u32 p_vaddr; + u32 p_paddr; + u32 p_filesz; + u32 p_memsz; + u32 p_flags; + u32 p_align; + void Load(vfsStream& f) + { + p_type = Read32(f); + p_offset = Read32(f); + p_vaddr = Read32(f); + p_paddr = Read32(f); + p_filesz = Read32(f); + p_memsz = Read32(f); + p_flags = Read32(f); + p_align = Read32(f); + } + void LoadLE(vfsStream& f) + { + f.Read(this, sizeof(*this)); + } + void Show() {} +}; + +struct Elf64_Ehdr +{ + u32 e_magic; + u8 e_class; + u8 e_data; + u8 e_curver; + u8 e_os_abi; + u64 e_abi_ver; + u16 e_type; + u16 e_machine; + u32 e_version; + u64 e_entry; + u64 e_phoff; + u64 e_shoff; + u32 e_flags; + u16 e_ehsize; + u16 e_phentsize; + u16 e_phnum; + u16 e_shentsize; + u16 e_shnum; + u16 e_shstrndx; + void Load(vfsStream& f) + { + e_magic = Read32(f); + e_class = Read8(f); + e_data = Read8(f); + e_curver = Read8(f); + e_os_abi = Read8(f); + e_abi_ver = Read64(f); + e_type = Read16(f); + e_machine = Read16(f); + e_version = Read32(f); + e_entry = Read64(f); + e_phoff = Read64(f); + e_shoff = Read64(f); + e_flags = Read32(f); + e_ehsize = Read16(f); + e_phentsize = Read16(f); + e_phnum = Read16(f); + e_shentsize = Read16(f); + e_shnum = Read16(f); + e_shstrndx = Read16(f); + } + void Show() {} + bool CheckMagic() const { return e_magic == 0x7F454C46; } + u64 GetEntry() const { return e_entry; } +}; + +struct Elf64_Shdr +{ + u32 sh_name; + u32 sh_type; + u64 sh_flags; + u64 sh_addr; + u64 sh_offset; + u64 sh_size; + u32 sh_link; + u32 sh_info; + u64 sh_addralign; + u64 sh_entsize; + void Load(vfsStream& f) + { + sh_name = Read32(f); + sh_type = Read32(f); + sh_flags = Read64(f); + sh_addr = Read64(f); + sh_offset = Read64(f); + sh_size = Read64(f); + sh_link = Read32(f); + sh_info = Read32(f); + sh_addralign = Read64(f); + sh_entsize = Read64(f); + } + void Show(){} +}; + +struct Elf64_Phdr +{ + u32 p_type; + u32 p_flags; + u64 p_offset; + u64 p_vaddr; + u64 p_paddr; + u64 p_filesz; + u64 p_memsz; + u64 p_align; + void Load(vfsStream& f) + { + p_type = Read32(f); + p_flags = Read32(f); + p_offset = Read64(f); + p_vaddr = Read64(f); + p_paddr = Read64(f); + p_filesz = Read64(f); + p_memsz = Read64(f); + p_align = Read64(f); + } + void Show(){} +}; + +struct SceHeader +{ + u32 se_magic; + u32 se_hver; + u16 se_flags; + u16 se_type; + u32 se_meta; + u64 se_hsize; + u64 se_esize; + void Load(vfsStream& f) + { + se_magic = Read32(f); + se_hver = Read32(f); + se_flags = Read16(f); + se_type = Read16(f); + se_meta = Read32(f); + se_hsize = Read64(f); + se_esize = Read64(f); + } + void Show(){} + bool CheckMagic() const { return se_magic == 0x53434500; } +}; + +struct SelfHeader +{ + u64 se_htype; + u64 se_appinfooff; + u64 se_elfoff; + u64 se_phdroff; + u64 se_shdroff; + u64 se_secinfoff; + u64 se_sceveroff; + u64 se_controloff; + u64 se_controlsize; + u64 pad; + void Load(vfsStream& f) + { + se_htype = Read64(f); + se_appinfooff = Read64(f); + se_elfoff = Read64(f); + se_phdroff = Read64(f); + se_shdroff = Read64(f); + se_secinfoff = Read64(f); + se_sceveroff = Read64(f); + se_controloff = Read64(f); + se_controlsize = Read64(f); + pad = Read64(f); + } + void Show(){} +}; + + class SELFDecrypter { // Main SELF file stream. diff --git a/rpcs3/Emu/ARMv7/ARMv7Thread.cpp b/rpcs3/Emu/ARMv7/ARMv7Thread.cpp index 732c2ad7fa..8c57c78485 100644 --- a/rpcs3/Emu/ARMv7/ARMv7Thread.cpp +++ b/rpcs3/Emu/ARMv7/ARMv7Thread.cpp @@ -3,6 +3,7 @@ #include "Utilities/Log.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" +#include "Emu/CPU/CPUThreadManager.h" #include "ARMv7Thread.h" #include "ARMv7Decoder.h" @@ -100,3 +101,15 @@ void ARMv7Thread::DoStop() void ARMv7Thread::DoCode() { } + +arm7_thread::arm7_thread(u32 entry, const std::string& name, u32 stack_size, u32 prio) +{ + thread = &Emu.GetCPU().AddThread(CPU_THREAD_ARMv7); + + thread->SetName(name); + thread->SetEntry(entry); + thread->SetStackSize(stack_size ? stack_size : Emu.GetInfo().GetProcParam().primary_stacksize); + thread->SetPrio(prio ? prio : Emu.GetInfo().GetProcParam().primary_prio); + + argc = 0; +} \ No newline at end of file diff --git a/rpcs3/Emu/ARMv7/ARMv7Thread.h b/rpcs3/Emu/ARMv7/ARMv7Thread.h index d80d5c9669..c657042e10 100644 --- a/rpcs3/Emu/ARMv7/ARMv7Thread.h +++ b/rpcs3/Emu/ARMv7/ARMv7Thread.h @@ -138,3 +138,48 @@ protected: virtual void DoCode(); }; +class arm7_thread : cpu_thread +{ + static const u32 stack_align = 0x10; + vm::ptr argv; + u32 argc; + vm::ptr envp; + +public: + arm7_thread(u32 entry, const std::string& name = "", u32 stack_size = 0, u32 prio = 0); + + cpu_thread& args(std::initializer_list values) override + { + if (!values.size()) + return *this; + + assert(argc == 0); + + envp.set(vm::alloc((u32)sizeof(envp), stack_align, vm::main)); + *envp = 0; + argv.set(vm::alloc(u32(sizeof(argv)* values.size()), stack_align, vm::main)); + + for (auto &arg : values) + { + u32 arg_size = align(u32(arg.size() + 1), stack_align); + u32 arg_addr = vm::alloc(arg_size, stack_align, vm::main); + + std::strcpy(vm::get_ptr(arg_addr), arg.c_str()); + + argv[argc++] = arg_addr; + } + + return *this; + } + + cpu_thread& run() override + { + thread->Run(); + + static_cast(thread)->GPR[3] = argc; + static_cast(thread)->GPR[4] = argv.addr(); + static_cast(thread)->GPR[5] = envp.addr(); + + return *this; + } +}; \ No newline at end of file diff --git a/rpcs3/Emu/CPU/CPUDecoder.h b/rpcs3/Emu/CPU/CPUDecoder.h index d1faeea13d..beb0027055 100644 --- a/rpcs3/Emu/CPU/CPUDecoder.h +++ b/rpcs3/Emu/CPU/CPUDecoder.h @@ -340,7 +340,7 @@ public: , m_args_count(source.m_args_count) , m_args(source.m_args_count ? new CodeFieldBase*[source.m_args_count] : nullptr) { - for(int i = 0; i < source.m_args_count; ++i) + for(uint i = 0; i < source.m_args_count; ++i) m_args[i] = source.m_args[i]; } diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 35d48dd990..d5b2a94eb1 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -256,3 +256,42 @@ protected: }; CPUThread* GetCurrentCPUThread(); + +class cpu_thread +{ +protected: + CPUThread* thread; + +public: + u32 get_entry() const + { + return thread->entry; + } + + virtual cpu_thread& args(std::initializer_list values) = 0; + + virtual cpu_thread& run() = 0; + + u64 join() + { + if (!joinable()) + throw "thread must be joinable for join"; + + thread->SetJoinable(false); + + while (thread->IsRunning()) + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + + return thread->GetExitStatus(); + } + + bool joinable() const + { + return thread->IsJoinable(); + } + + u32 get_id() const + { + thread->GetId(); + } +}; \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPCDecoder.cpp b/rpcs3/Emu/Cell/PPCDecoder.cpp index 52e28b9e91..de2b55fdf9 100644 --- a/rpcs3/Emu/Cell/PPCDecoder.cpp +++ b/rpcs3/Emu/Cell/PPCDecoder.cpp @@ -8,4 +8,4 @@ u8 PPCDecoder::DecodeMemory(const u32 address) Decode(instr); return sizeof(u32); -} +} \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUInstrTable.h b/rpcs3/Emu/Cell/PPUInstrTable.h index 77f742d467..250311321e 100644 --- a/rpcs3/Emu/Cell/PPUInstrTable.h +++ b/rpcs3/Emu/Cell/PPUInstrTable.h @@ -5,628 +5,656 @@ namespace PPU_instr { - //This field is used in rotate instructions to specify the first 1 bit of a 64-bit mask - static DoubleCodeField<21, 25, 26, 26, 5> mb; + namespace fields + { + //This field is used in rotate instructions to specify the first 1 bit of a 64-bit mask + static DoubleCodeField<21, 25, 26, 26, 5> mb; - //This field is used in rotate instructions to specify the last 1 bit of a 64-bit mask - static DoubleCodeField<21, 25, 26, 26, 5> me; + //This field is used in rotate instructions to specify the last 1 bit of a 64-bit mask + static DoubleCodeField<21, 25, 26, 26, 5> me; - //This field is used to specify a shift amount - static DoubleCodeField<16, 20, 30, 30, 5> sh; + //This field is used to specify a shift amount + static DoubleCodeField<16, 20, 30, 30, 5> sh; - //This field is used to specify a special-purpose register for the mtspr and mfspr instructions - static CodeField<11, 20> SPR; + //This field is used to specify a special-purpose register for the mtspr and mfspr instructions + static CodeField<11, 20> SPR; - // - static CodeField<6, 10> VS(FIELD_R_VPR); + // + static CodeField<6, 10> VS(FIELD_R_VPR); - // - static CodeField<6, 10> VD(FIELD_R_VPR); + // + static CodeField<6, 10> VD(FIELD_R_VPR); - // - static CodeField<11, 15> VA(FIELD_R_VPR); + // + static CodeField<11, 15> VA(FIELD_R_VPR); - // - static CodeField<16, 20> VB(FIELD_R_VPR); + // + static CodeField<16, 20> VB(FIELD_R_VPR); - // - static CodeField<21, 25> VC(FIELD_R_VPR); + // + static CodeField<21, 25> VC(FIELD_R_VPR); - // - static CodeField<11, 15> VUIMM; + // + static CodeField<11, 15> VUIMM; - // - static CodeFieldSigned<11, 15> VSIMM; + // + static CodeFieldSigned<11, 15> VSIMM; - // - static CodeField<22, 25> VSH; + // + static CodeField<22, 25> VSH; - //This field is used to specify a GPR to be used as a destination - static CodeField<6, 10> RD(FIELD_R_GPR); + //This field is used to specify a GPR to be used as a destination + static CodeField<6, 10> RD(FIELD_R_GPR); - //This field is used to specify a GPR to be used as a source - static CodeField<6, 10> RS(FIELD_R_GPR); + //This field is used to specify a GPR to be used as a source + static CodeField<6, 10> RS(FIELD_R_GPR); - //This field is used to specify a GPR to be used as a source or destination - static CodeField<11, 15> RA(FIELD_R_GPR); + //This field is used to specify a GPR to be used as a source or destination + static CodeField<11, 15> RA(FIELD_R_GPR); - //This field is used to specify a GPR to be used as a source - static CodeField<16, 20> RB(FIELD_R_GPR); + //This field is used to specify a GPR to be used as a source + static CodeField<16, 20> RB(FIELD_R_GPR); - //This field is used to specify the number of bytes to move in an immediate string load or store - static CodeField<16, 20> NB; + //This field is used to specify the number of bytes to move in an immediate string load or store + static CodeField<16, 20> NB; - //This field is used to specify one of the CR fields, or one of the FPSCR fields, as a destination - static CodeField<6, 8> CRFD(FIELD_R_CR); + //This field is used to specify one of the CR fields, or one of the FPSCR fields, as a destination + static CodeField<6, 8> CRFD(FIELD_R_CR); - //This field is used to specify one of the CR fields, or one of the FPSCR fields, as a source - static CodeField<11, 13> CRFS(FIELD_R_CR); + //This field is used to specify one of the CR fields, or one of the FPSCR fields, as a source + static CodeField<11, 13> CRFS(FIELD_R_CR); - //This field is used to specify a bit in the CR to be used as a source - static CodeField<11, 15> CRBA(FIELD_R_CR); + //This field is used to specify a bit in the CR to be used as a source + static CodeField<11, 15> CRBA(FIELD_R_CR); - //This field is used to specify a bit in the CR to be used as a source - static CodeField<16, 20> CRBB(FIELD_R_CR); + //This field is used to specify a bit in the CR to be used as a source + static CodeField<16, 20> CRBB(FIELD_R_CR); - //This field is used to specify a bit in the CR, or in the FPSCR, as the destination of the result of an instruction - static CodeField<6, 10> CRBD(FIELD_R_CR); + //This field is used to specify a bit in the CR, or in the FPSCR, as the destination of the result of an instruction + static CodeField<6, 10> CRBD(FIELD_R_CR); - //This field is used to specify options for the branch conditional instructions - static CodeField<6, 10> BO; + //This field is used to specify options for the branch conditional instructions + static CodeField<6, 10> BO; - //This field is used to specify a bit in the CR to be used as the condition of a branch conditional instruction - static CodeField<11, 15> BI; + //This field is used to specify a bit in the CR to be used as the condition of a branch conditional instruction + static CodeField<11, 15> BI; - //Immediate field specifying a 14-bit signed two's complement branch displacement that is concatenated on the - //right with '00' and sign-extended to 64 bits. - static CodeFieldSigned<16, 31> BD(FIELD_BRANCH); + //Immediate field specifying a 14-bit signed two's complement branch displacement that is concatenated on the + //right with '00' and sign-extended to 64 bits. + static CodeFieldSigned<16, 31> BD(FIELD_BRANCH); - // - static CodeField<19, 20> BH; + // + static CodeField<19, 20> BH; - // - static CodeField<11, 13> BFA; - - //Field used by the optional data stream variant of the dcbt instruction. - static CodeField<9, 10> TH; + // + static CodeField<11, 13> BFA; - //This field is used to specify the conditions on which to trap - static CodeField<6, 10> TO; + //Field used by the optional data stream variant of the dcbt instruction. + static CodeField<9, 10> TH; - // - static CodeField<21, 25> MB; + //This field is used to specify the conditions on which to trap + static CodeField<6, 10> TO; - // - static CodeField<26, 30> ME; + // + static CodeField<21, 25> MB; - //This field is used to specify a shift amount - static CodeField<16, 20> SH; + // + static CodeField<26, 30> ME; - /* - Absolute address bit. - 0 The immediate field represents an address relative to the current instruction address (CIA). (For more - information on the CIA, see Table 8-3.) The effective (logical) address of the branch is either the sum - of the LI field sign-extended to 64 bits and the address of the branch instruction or the sum of the BD - field sign-extended to 64 bits and the address of the branch instruction. - 1 The immediate field represents an absolute address. The effective address (EA) of the branch is the - LI field sign-extended to 64 bits or the BD field sign-extended to 64 bits. - */ - static CodeField<30> AA; + //This field is used to specify a shift amount + static CodeField<16, 20> SH; - static CodeFieldSignedOffset<6, 29, 2> LI(FIELD_BRANCH); - - // - static CodeFieldSignedOffset<6, 29, 2> LL(FIELD_BRANCH); - /* - Link bit. - 0 Does not update the link register (LR). - 1 Updates the LR. If the instruction is a branch instruction, the address of the instruction following the - branch instruction is placed into the LR. - */ - static CodeField<31> LK; - - //This field is used for extended arithmetic to enable setting OV and SO in the XER - static CodeField<21> OE; - - //Field used to specify whether an integer compare instruction is to compare 64-bit numbers or 32-bit numbers - static CodeField<10> L_10; - static CodeField<6> L_6; - static CodeField<9, 10> L_9_10; - static CodeField<11> L_11; - // - static CodeField<16, 19> I; - - // - static CodeField<16, 27> DQ; - - //This field is used to specify an FPR as the destination - static CodeField<6, 10> FRD; - - //This field is used to specify an FPR as a source - static CodeField<6, 10> FRS; - - // - static CodeField<7, 14> FM; - - //This field is used to specify an FPR as a source - static CodeField<11, 15> FRA(FIELD_R_FPR); - - //This field is used to specify an FPR as a source - static CodeField<16, 20> FRB(FIELD_R_FPR); - - //This field is used to specify an FPR as a source - static CodeField<21, 25> FRC(FIELD_R_FPR); - - //This field mask is used to identify the CR fields that are to be updated by the mtcrf instruction. - static CodeField<12, 19> CRM; - - // - static CodeField<6, 31> SYS; - - //Immediate field specifying a 16-bit signed two's complement integer that is sign-extended to 64 bits - static CodeFieldSigned<16, 31> D; - - // - static CodeFieldSignedOffset<16, 29, 2> DS; - - //This immediate field is used to specify a 16-bit signed integer - static CodeFieldSigned<16, 31> simm16; - - //This immediate field is used to specify a 16-bit unsigned integer - static CodeField<16, 31> uimm16; - - /* - Record bit. - 0 Does not update the condition register (CR). - 1 Updates the CR to reflect the result of the operation. - For integer instructions, CR bits [0-2] are set to reflect the result as a signed quantity and CR bit [3] - receives a copy of the summary overflow bit, XER[SO]. The result as an unsigned quantity or a bit - string can be deduced from the EQ bit. For floating-point instructions, CR bits [4-7] are set to reflect - floating-point exception, floating-point enabled exception, floating-point invalid operation exception, - and floating-point overflow exception. - */ - static CodeField<31> RC; - - //Primary opcode field - static CodeField<0, 5> OPCD; - - static CodeField<26, 31> GD_04; //0x3f - static CodeField<21, 31> GD_04_0;//0x7ff - static CodeField<21, 30> GD_13; //0x3ff - static CodeField<27, 29> GD_1e; //0x7 - static CodeField<21, 30> GD_1f; //0x3ff - static CodeField<30, 31> GD_3a; //0x3 - static CodeField<26, 30> GD_3b; //0x1f - static CodeField<30, 31> GD_3e; //0x3 - static CodeField<26, 30> GD_3f;//0x1f - static CodeField<21, 30> GD_3f_0; //0x3ff - - static CodeField<9, 10> STRM; - - //static auto main_list = new_list(OPCD, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, OPCD)); - static InstrList<1 << CodeField<0, 5>::size, ::PPUOpcodes> main_list_obj(OPCD, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, OPCD)); - static auto main_list = &main_list_obj; - static auto g04_list = new_list(main_list, PPU_opcodes::G_04, GD_04); - static auto g04_0_list = new_list(g04_list, GD_04_0, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_04_0)); - static auto g13_list = new_list(main_list, PPU_opcodes::G_13, GD_13, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_13)); - static auto g1e_list = new_list(main_list, PPU_opcodes::G_1e, GD_1e, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_1e)); - static auto g1f_list = new_list(main_list, PPU_opcodes::G_1f, GD_1f, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_1f)); - static auto g3a_list = new_list(main_list, PPU_opcodes::G_3a, GD_3a, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_3a)); - static auto g3b_list = new_list(main_list, PPU_opcodes::G_3b, GD_3b, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_3b)); - static auto g3e_list = new_list(main_list, PPU_opcodes::G_3e, GD_3e, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_3e)); - static auto g3f_list = new_list(main_list, PPU_opcodes::G_3f, GD_3f); - static auto g3f_0_list = new_list(g3f_list, GD_3f_0, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_3f_0)); - - #define bind_instr(list, name, ...) \ - static const auto& name = make_instr(list, #name, &PPUOpcodes::name, ##__VA_ARGS__) - - bind_instr(main_list, TDI, TO, RA, simm16); - bind_instr(main_list, TWI, TO, RA, simm16); - bind_instr(main_list, MULLI, RD, RA, simm16); - bind_instr(main_list, SUBFIC, RD, RA, simm16); - bind_instr(main_list, CMPLI, CRFD, L_10, RA, uimm16); - bind_instr(main_list, CMPI, CRFD, L_10, RA, simm16); - bind_instr(main_list, ADDIC, RD, RA, simm16); - bind_instr(main_list, ADDIC_, RD, RA, simm16); - bind_instr(main_list, ADDI, RD, RA, simm16); - bind_instr(main_list, ADDIS, RD, RA, simm16); - bind_instr(main_list, BC, BO, BI, BD, AA, LK); - bind_instr(main_list, SC, SYS); - bind_instr(main_list, B, LI, AA, LK); - bind_instr(main_list, RLWIMI, RA, RS, SH, MB, ME, RC); - bind_instr(main_list, RLWINM, RA, RS, SH, MB, ME, RC); - bind_instr(main_list, RLWNM, RA, RS, RB, MB, ME, RC); - bind_instr(main_list, ORI, RA, RS, uimm16); - bind_instr(main_list, ORIS, RA, RS, uimm16); - bind_instr(main_list, XORI, RA, RS, uimm16); - bind_instr(main_list, XORIS, RA, RS, uimm16); - bind_instr(main_list, ANDI_, RA, RS, uimm16); - bind_instr(main_list, ANDIS_, RA, RS, uimm16); - bind_instr(main_list, LWZ, RD, RA, D); - bind_instr(main_list, LWZU, RD, RA, D); - bind_instr(main_list, LBZ, RD, RA, D); - bind_instr(main_list, LBZU, RD, RA, D); - bind_instr(main_list, STW, RS, RA, D); - bind_instr(main_list, STWU, RS, RA, D); - bind_instr(main_list, STB, RS, RA, D); - bind_instr(main_list, STBU, RS, RA, D); - bind_instr(main_list, LHZ, RD, RA, D); - bind_instr(main_list, LHZU, RD, RA, D); - bind_instr(main_list, LHA, RD, RA, D); - bind_instr(main_list, LHAU, RD, RA, D); - bind_instr(main_list, STH, RS, RA, D); - bind_instr(main_list, STHU, RS, RA, D); - bind_instr(main_list, LMW, RD, RA, D); - bind_instr(main_list, STMW, RS, RA, D); - bind_instr(main_list, LFS, FRD, RA, D); - bind_instr(main_list, LFSU, FRD, RA, D); - bind_instr(main_list, LFD, FRD, RA, D); - bind_instr(main_list, LFDU, FRD, RA, D); - bind_instr(main_list, STFS, FRS, RA, D); - bind_instr(main_list, STFSU, FRS, RA, D); - bind_instr(main_list, STFD, FRS, RA, D); - bind_instr(main_list, STFDU, FRS, RA, D); - - bind_instr(g04_list, VMADDFP, VD, VA, VC, VB); - bind_instr(g04_list, VMHADDSHS, VD, VA, VB, VC); - bind_instr(g04_list, VMHRADDSHS, VD, VA, VB, VC); - bind_instr(g04_list, VMLADDUHM, VD, VA, VB, VC); - bind_instr(g04_list, VMSUMMBM, VD, VA, VB, VC); - bind_instr(g04_list, VMSUMSHM, VD, VA, VB, VC); - bind_instr(g04_list, VMSUMSHS, VD, VA, VB, VC); - bind_instr(g04_list, VMSUMUBM, VD, VA, VB, VC); - bind_instr(g04_list, VMSUMUHM, VD, VA, VB, VC); - bind_instr(g04_list, VMSUMUHS, VD, VA, VB, VC); - bind_instr(g04_list, VNMSUBFP, VD, VA, VC, VB); - bind_instr(g04_list, VPERM, VD, VA, VB, VC); - bind_instr(g04_list, VSEL, VD, VA, VB, VC); - bind_instr(g04_list, VSLDOI, VD, VA, VB, VSH); - - bind_instr(g04_0_list, MFVSCR, VD); - bind_instr(g04_0_list, MTVSCR, VB); - bind_instr(g04_0_list, VADDCUW, VD, VA, VB); - bind_instr(g04_0_list, VADDFP, VD, VA, VB); - bind_instr(g04_0_list, VADDSBS, VD, VA, VB); - bind_instr(g04_0_list, VADDSHS, VD, VA, VB); - bind_instr(g04_0_list, VADDSWS, VD, VA, VB); - bind_instr(g04_0_list, VADDUBM, VD, VA, VB); - bind_instr(g04_0_list, VADDUBS, VD, VA, VB); - bind_instr(g04_0_list, VADDUHM, VD, VA, VB); - bind_instr(g04_0_list, VADDUHS, VD, VA, VB); - bind_instr(g04_0_list, VADDUWM, VD, VA, VB); - bind_instr(g04_0_list, VADDUWS, VD, VA, VB); - bind_instr(g04_0_list, VAND, VD, VA, VB); - bind_instr(g04_0_list, VANDC, VD, VA, VB); - bind_instr(g04_0_list, VAVGSB, VD, VA, VB); - bind_instr(g04_0_list, VAVGSH, VD, VA, VB); - bind_instr(g04_0_list, VAVGSW, VD, VA, VB); - bind_instr(g04_0_list, VAVGUB, VD, VA, VB); - bind_instr(g04_0_list, VAVGUH, VD, VA, VB); - bind_instr(g04_0_list, VAVGUW, VD, VA, VB); - bind_instr(g04_0_list, VCFSX, VD, VUIMM, VB); - bind_instr(g04_0_list, VCFUX, VD, VUIMM, VB); - bind_instr(g04_0_list, VCMPBFP, VD, VA, VB); - bind_instr(g04_0_list, VCMPBFP_, VD, VA, VB); - bind_instr(g04_0_list, VCMPEQFP, VD, VA, VB); - bind_instr(g04_0_list, VCMPEQFP_, VD, VA, VB); - bind_instr(g04_0_list, VCMPEQUB, VD, VA, VB); - bind_instr(g04_0_list, VCMPEQUB_, VD, VA, VB); - bind_instr(g04_0_list, VCMPEQUH, VD, VA, VB); - bind_instr(g04_0_list, VCMPEQUH_, VD, VA, VB); - bind_instr(g04_0_list, VCMPEQUW, VD, VA, VB); - bind_instr(g04_0_list, VCMPEQUW_, VD, VA, VB); - bind_instr(g04_0_list, VCMPGEFP, VD, VA, VB); - bind_instr(g04_0_list, VCMPGEFP_, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTFP, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTFP_, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTSB, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTSB_, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTSH, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTSH_, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTSW, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTSW_, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTUB, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTUB_, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTUH, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTUH_, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTUW, VD, VA, VB); - bind_instr(g04_0_list, VCMPGTUW_, VD, VA, VB); - bind_instr(g04_0_list, VCTSXS, VD, VUIMM, VB); - bind_instr(g04_0_list, VCTUXS, VD, VUIMM, VB); - bind_instr(g04_0_list, VEXPTEFP, VD, VB); - bind_instr(g04_0_list, VLOGEFP, VD, VB); - bind_instr(g04_0_list, VMAXFP, VD, VA, VB); - bind_instr(g04_0_list, VMAXSB, VD, VA, VB); - bind_instr(g04_0_list, VMAXSH, VD, VA, VB); - bind_instr(g04_0_list, VMAXSW, VD, VA, VB); - bind_instr(g04_0_list, VMAXUB, VD, VA, VB); - bind_instr(g04_0_list, VMAXUH, VD, VA, VB); - bind_instr(g04_0_list, VMAXUW, VD, VA, VB); - bind_instr(g04_0_list, VMINFP, VD, VA, VB); - bind_instr(g04_0_list, VMINSB, VD, VA, VB); - bind_instr(g04_0_list, VMINSH, VD, VA, VB); - bind_instr(g04_0_list, VMINSW, VD, VA, VB); - bind_instr(g04_0_list, VMINUB, VD, VA, VB); - bind_instr(g04_0_list, VMINUH, VD, VA, VB); - bind_instr(g04_0_list, VMINUW, VD, VA, VB); - bind_instr(g04_0_list, VMRGHB, VD, VA, VB); - bind_instr(g04_0_list, VMRGHH, VD, VA, VB); - bind_instr(g04_0_list, VMRGHW, VD, VA, VB); - bind_instr(g04_0_list, VMRGLB, VD, VA, VB); - bind_instr(g04_0_list, VMRGLH, VD, VA, VB); - bind_instr(g04_0_list, VMRGLW, VD, VA, VB); - bind_instr(g04_0_list, VMULESB, VD, VA, VB); - bind_instr(g04_0_list, VMULESH, VD, VA, VB); - bind_instr(g04_0_list, VMULEUB, VD, VA, VB); - bind_instr(g04_0_list, VMULEUH, VD, VA, VB); - bind_instr(g04_0_list, VMULOSB, VD, VA, VB); - bind_instr(g04_0_list, VMULOSH, VD, VA, VB); - bind_instr(g04_0_list, VMULOUB, VD, VA, VB); - bind_instr(g04_0_list, VMULOUH, VD, VA, VB); - bind_instr(g04_0_list, VNOR, VD, VA, VB); - bind_instr(g04_0_list, VOR, VD, VA, VB); - bind_instr(g04_0_list, VPKPX, VD, VA, VB); - bind_instr(g04_0_list, VPKSHSS, VD, VA, VB); - bind_instr(g04_0_list, VPKSHUS, VD, VA, VB); - bind_instr(g04_0_list, VPKSWSS, VD, VA, VB); - bind_instr(g04_0_list, VPKSWUS, VD, VA, VB); - bind_instr(g04_0_list, VPKUHUM, VD, VA, VB); - bind_instr(g04_0_list, VPKUHUS, VD, VA, VB); - bind_instr(g04_0_list, VPKUWUM, VD, VA, VB); - bind_instr(g04_0_list, VPKUWUS, VD, VA, VB); - bind_instr(g04_0_list, VREFP, VD, VB); - bind_instr(g04_0_list, VRFIM, VD, VB); - bind_instr(g04_0_list, VRFIN, VD, VB); - bind_instr(g04_0_list, VRFIP, VD, VB); - bind_instr(g04_0_list, VRFIZ, VD, VB); - bind_instr(g04_0_list, VRLB, VD, VA, VB); - bind_instr(g04_0_list, VRLH, VD, VA, VB); - bind_instr(g04_0_list, VRLW, VD, VA, VB); - bind_instr(g04_0_list, VRSQRTEFP, VD, VB); - bind_instr(g04_0_list, VSL, VD, VA, VB); - bind_instr(g04_0_list, VSLB, VD, VA, VB); - bind_instr(g04_0_list, VSLH, VD, VA, VB); - bind_instr(g04_0_list, VSLO, VD, VA, VB); - bind_instr(g04_0_list, VSLW, VD, VA, VB); - bind_instr(g04_0_list, VSPLTB, VD, VUIMM, VB); - bind_instr(g04_0_list, VSPLTH, VD, VUIMM, VB); - bind_instr(g04_0_list, VSPLTISB, VD, VSIMM); - bind_instr(g04_0_list, VSPLTISH, VD, VSIMM); - bind_instr(g04_0_list, VSPLTISW, VD, VSIMM); - bind_instr(g04_0_list, VSPLTW, VD, VUIMM, VB); - bind_instr(g04_0_list, VSR, VD, VA, VB); - bind_instr(g04_0_list, VSRAB, VD, VA, VB); - bind_instr(g04_0_list, VSRAH, VD, VA, VB); - bind_instr(g04_0_list, VSRAW, VD, VA, VB); - bind_instr(g04_0_list, VSRB, VD, VA, VB); - bind_instr(g04_0_list, VSRH, VD, VA, VB); - bind_instr(g04_0_list, VSRO, VD, VA, VB); - bind_instr(g04_0_list, VSRW, VD, VA, VB); - bind_instr(g04_0_list, VSUBCUW, VD, VA, VB); - bind_instr(g04_0_list, VSUBFP, VD, VA, VB); - bind_instr(g04_0_list, VSUBSBS, VD, VA, VB); - bind_instr(g04_0_list, VSUBSHS, VD, VA, VB); - bind_instr(g04_0_list, VSUBSWS, VD, VA, VB); - bind_instr(g04_0_list, VSUBUBM, VD, VA, VB); - bind_instr(g04_0_list, VSUBUBS, VD, VA, VB); - bind_instr(g04_0_list, VSUBUHM, VD, VA, VB); - bind_instr(g04_0_list, VSUBUHS, VD, VA, VB); - bind_instr(g04_0_list, VSUBUWM, VD, VA, VB); - bind_instr(g04_0_list, VSUBUWS, VD, VA, VB); - bind_instr(g04_0_list, VSUMSWS, VD, VA, VB); - bind_instr(g04_0_list, VSUM2SWS, VD, VA, VB); - bind_instr(g04_0_list, VSUM4SBS, VD, VA, VB); - bind_instr(g04_0_list, VSUM4SHS, VD, VA, VB); - bind_instr(g04_0_list, VSUM4UBS, VD, VA, VB); - bind_instr(g04_0_list, VUPKHPX, VD, VB); - bind_instr(g04_0_list, VUPKHSB, VD, VB); - bind_instr(g04_0_list, VUPKHSH, VD, VB); - bind_instr(g04_0_list, VUPKLPX, VD, VB); - bind_instr(g04_0_list, VUPKLSB, VD, VB); - bind_instr(g04_0_list, VUPKLSH, VD, VB); - bind_instr(g04_0_list, VXOR, VD, VA, VB); - - bind_instr(g13_list, MCRF, CRFD, CRFS); - bind_instr(g13_list, BCLR, BO, BI, BH, LK); - bind_instr(g13_list, CRNOR, CRBD, CRBA, CRBB); - bind_instr(g13_list, CRANDC, CRBD, CRBA, CRBB); - bind_instr(g13_list, ISYNC); - bind_instr(g13_list, CRXOR, CRBD, CRBA, CRBB); - bind_instr(g13_list, CRNAND, CRBD, CRBA, CRBB); - bind_instr(g13_list, CRAND, CRBD, CRBA, CRBB); - bind_instr(g13_list, CREQV, CRBD, CRBA, CRBB); - bind_instr(g13_list, CRORC, CRBD, CRBA, CRBB); - bind_instr(g13_list, CROR, CRBD, CRBA, CRBB); - bind_instr(g13_list, BCCTR, BO, BI, BH, LK); - - bind_instr(g1e_list, RLDICL, RA, RS, sh, mb, RC); - bind_instr(g1e_list, RLDICR, RA, RS, sh, me, RC); - bind_instr(g1e_list, RLDIC, RA, RS, sh, mb, RC); - bind_instr(g1e_list, RLDIMI, RA, RS, sh, mb, RC); - bind_instr(g1e_list, RLDC_LR, RA, RS, RB, mb, AA, RC); - - /*0x000*/bind_instr(g1f_list, CMP, CRFD, L_10, RA, RB); - /*0x004*/bind_instr(g1f_list, TW, TO, RA, RB); - /*0x006*/bind_instr(g1f_list, LVSL, VD, RA, RB); - /*0x007*/bind_instr(g1f_list, LVEBX, VD, RA, RB); - /*0x008*/bind_instr(g1f_list, SUBFC, RD, RA, RB, OE, RC); - /*0x009*/bind_instr(g1f_list, MULHDU, RD, RA, RB, RC); - /*0x00a*/bind_instr(g1f_list, ADDC, RD, RA, RB, OE, RC); - /*0x00b*/bind_instr(g1f_list, MULHWU, RD, RA, RB, RC); - /*0x013*/bind_instr(g1f_list, MFOCRF, L_11, RD, CRM); - /*0x014*/bind_instr(g1f_list, LWARX, RD, RA, RB); - /*0x015*/bind_instr(g1f_list, LDX, RD, RA, RB); - /*0x017*/bind_instr(g1f_list, LWZX, RD, RA, RB); - /*0x018*/bind_instr(g1f_list, SLW, RA, RS, RB, RC); - /*0x01a*/bind_instr(g1f_list, CNTLZW, RA, RS, RC); - /*0x01b*/bind_instr(g1f_list, SLD, RA, RS, RB, RC); - /*0x01c*/bind_instr(g1f_list, AND, RA, RS, RB, RC); - /*0x020*/bind_instr(g1f_list, CMPL, CRFD, L_10, RA, RB); - /*0x026*/bind_instr(g1f_list, LVSR, VD, RA, RB); - /*0x027*/bind_instr(g1f_list, LVEHX, VD, RA, RB); - /*0x028*/bind_instr(g1f_list, SUBF, RD, RA, RB, OE, RC); - /*0x035*/bind_instr(g1f_list, LDUX, RD, RA, RB); - /*0x036*/bind_instr(g1f_list, DCBST, RA, RB); - /*0x037*/bind_instr(g1f_list, LWZUX, RD, RA, RB); - /*0x03a*/bind_instr(g1f_list, CNTLZD, RA, RS, RC); - /*0x03c*/bind_instr(g1f_list, ANDC, RA, RS, RB, RC); - /*0x03c*/bind_instr(g1f_list, TD, TO, RA, RB); - /*0x047*/bind_instr(g1f_list, LVEWX, VD, RA, RB); - /*0x049*/bind_instr(g1f_list, MULHD, RD, RA, RB, RC); - /*0x04b*/bind_instr(g1f_list, MULHW, RD, RA, RB, RC); - /*0x054*/bind_instr(g1f_list, LDARX, RD, RA, RB); - /*0x056*/bind_instr(g1f_list, DCBF, RA, RB); - /*0x057*/bind_instr(g1f_list, LBZX, RD, RA, RB); - /*0x067*/bind_instr(g1f_list, LVX, VD, RA, RB); - /*0x068*/bind_instr(g1f_list, NEG, RD, RA, OE, RC); - /*0x077*/bind_instr(g1f_list, LBZUX, RD, RA, RB); - /*0x07c*/bind_instr(g1f_list, NOR, RA, RS, RB, RC); - /*0x087*/bind_instr(g1f_list, STVEBX, VS, RA, RB); - /*0x088*/bind_instr(g1f_list, SUBFE, RD, RA, RB, OE, RC); - /*0x08a*/bind_instr(g1f_list, ADDE, RD, RA, RB, OE, RC); - /*0x090*/bind_instr(g1f_list, MTOCRF, L_11, CRM, RS); - /*0x095*/bind_instr(g1f_list, STDX, RS, RA, RB); - /*0x096*/bind_instr(g1f_list, STWCX_, RS, RA, RB); - /*0x097*/bind_instr(g1f_list, STWX, RS, RA, RB); - /*0x0a7*/bind_instr(g1f_list, STVEHX, VS, RA, RB); - /*0x0b5*/bind_instr(g1f_list, STDUX, RS, RA, RB); - /*0x0b7*/bind_instr(g1f_list, STWUX, RS, RA, RB); - /*0x0c7*/bind_instr(g1f_list, STVEWX, VS, RA, RB); - /*0x0c8*/bind_instr(g1f_list, SUBFZE, RD, RA, OE, RC); - /*0x0ca*/bind_instr(g1f_list, ADDZE, RD, RA, OE, RC); - /*0x0d6*/bind_instr(g1f_list, STDCX_, RS, RA, RB); - /*0x0d7*/bind_instr(g1f_list, STBX, RS, RA, RB); - /*0x0e7*/bind_instr(g1f_list, STVX, VS, RA, RB); - /*0x0e8*/bind_instr(g1f_list, SUBFME, RD, RA, OE, RC); - /*0x0e9*/bind_instr(g1f_list, MULLD, RD, RA, RB, OE, RC); - /*0x0ea*/bind_instr(g1f_list, ADDME, RD, RA, OE, RC); - /*0x0eb*/bind_instr(g1f_list, MULLW, RD, RA, RB, OE, RC); - /*0x0f6*/bind_instr(g1f_list, DCBTST, RA, RB, TH); - /*0x0f7*/bind_instr(g1f_list, STBUX, RS, RA, RB); - /*0x10a*/bind_instr(g1f_list, ADD, RD, RA, RB, OE, RC); - /*0x116*/bind_instr(g1f_list, DCBT, RA, RB, TH); - /*0x117*/bind_instr(g1f_list, LHZX, RD, RA, RB); - /*0x11c*/bind_instr(g1f_list, EQV, RA, RS, RB, RC); - /*0x136*/bind_instr(g1f_list, ECIWX, RD, RA, RB); - /*0x137*/bind_instr(g1f_list, LHZUX, RD, RA, RB); - /*0x13c*/bind_instr(g1f_list, XOR, RA, RS, RB, RC); - /*0x153*/bind_instr(g1f_list, MFSPR, RD, SPR); - /*0x155*/bind_instr(g1f_list, LWAX, RD, RA, RB); - /*0x156*/bind_instr(g1f_list, DST, RA, RB, STRM, L_6); - /*0x157*/bind_instr(g1f_list, LHAX, RD, RA, RB); - /*0x167*/bind_instr(g1f_list, LVXL, VD, RA, RB); - /*0x173*/bind_instr(g1f_list, MFTB, RD, SPR); - /*0x175*/bind_instr(g1f_list, LWAUX, RD, RA, RB); - /*0x176*/bind_instr(g1f_list, DSTST, RA, RB, STRM, L_6); - /*0x177*/bind_instr(g1f_list, LHAUX, RD, RA, RB); - /*0x197*/bind_instr(g1f_list, STHX, RS, RA, RB); - /*0x19c*/bind_instr(g1f_list, ORC, RA, RS, RB, RC); - /*0x1b6*/bind_instr(g1f_list, ECOWX, RS, RA, RB); - /*0x1b7*/bind_instr(g1f_list, STHUX, RS, RA, RB); - /*0x1bc*/bind_instr(g1f_list, OR, RA, RS, RB, RC); - /*0x1c9*/bind_instr(g1f_list, DIVDU, RD, RA, RB, OE, RC); - /*0x1cb*/bind_instr(g1f_list, DIVWU, RD, RA, RB, OE, RC); - /*0x1d3*/bind_instr(g1f_list, MTSPR, SPR, RS); - /*0x1d6*/bind_instr(g1f_list, DCBI, RA, RB); - /*0x1dc*/bind_instr(g1f_list, NAND, RA, RS, RB, RC); - /*0x1e7*/bind_instr(g1f_list, STVXL, VS, RA, RB); - /*0x1e9*/bind_instr(g1f_list, DIVD, RD, RA, RB, OE, RC); - /*0x1eb*/bind_instr(g1f_list, DIVW, RD, RA, RB, OE, RC); - /*0x207*/bind_instr(g1f_list, LVLX, VD, RA, RB); - /*0x214*/bind_instr(g1f_list, LDBRX, RD, RA, RB); - /*0x215*/bind_instr(g1f_list, LSWX, RD, RA, RB); - /*0x216*/bind_instr(g1f_list, LWBRX, RD, RA, RB); - /*0x217*/bind_instr(g1f_list, LFSX, FRD, RA, RB); - /*0x218*/bind_instr(g1f_list, SRW, RA, RS, RB, RC); - /*0x21b*/bind_instr(g1f_list, SRD, RA, RS, RB, RC); - /*0x227*/bind_instr(g1f_list, LVRX, VD, RA, RB); - /*0x237*/bind_instr(g1f_list, LFSUX, FRD, RA, RB); - /*0x255*/bind_instr(g1f_list, LSWI, RD, RA, NB); - /*0x256*/bind_instr(g1f_list, SYNC, L_9_10); - /*0x257*/bind_instr(g1f_list, LFDX, FRD, RA, RB); - /*0x277*/bind_instr(g1f_list, LFDUX, FRD, RA, RB); - /*0x287*/bind_instr(g1f_list, STVLX, VS, RA, RB); - /*0x296*/bind_instr(g1f_list, STSWX, RS, RA, RB); - /*0x296*/bind_instr(g1f_list, STWBRX, RS, RA, RB); - /*0x297*/bind_instr(g1f_list, STFSX, FRS, RA, RB); - /*0x2a7*/bind_instr(g1f_list, STVRX, VS, RA, RB); - /*0x2b7*/bind_instr(g1f_list, STFSUX, FRS, RA, RB); - /*0x2d5*/bind_instr(g1f_list, STSWI, RS, RA, NB); - /*0x2d7*/bind_instr(g1f_list, STFDX, FRS, RA, RB); - /*0x2d7*/bind_instr(g1f_list, STFDUX, FRS, RA, RB); - /*0x307*/bind_instr(g1f_list, LVLXL, VD, RA, RB); - /*0x316*/bind_instr(g1f_list, LHBRX, RD, RA, RB); - /*0x318*/bind_instr(g1f_list, SRAW, RA, RS, RB, RC); - /*0x31a*/bind_instr(g1f_list, SRAD, RA, RS, RB, RC); - /*0x327*/bind_instr(g1f_list, LVRXL, VD, RA, RB); - /*0x336*/bind_instr(g1f_list, DSS, STRM, L_6); - /*0x338*/bind_instr(g1f_list, SRAWI, RA, RS, SH, RC); - /*0x33a*/bind_instr(g1f_list, SRADI1, RA, RS, sh, RC); - /*0x33b*/bind_instr(g1f_list, SRADI2, RA, RS, sh, RC); - /*0x356*/bind_instr(g1f_list, EIEIO); - /*0x387*/bind_instr(g1f_list, STVLXL, VS, RA, RB); - /*0x396*/bind_instr(g1f_list, STHBRX, RS, RA, RB); - /*0x39a*/bind_instr(g1f_list, EXTSH, RA, RS, RC); - /*0x387*/bind_instr(g1f_list, STVRXL, VS, RA, RB); - /*0x3ba*/bind_instr(g1f_list, EXTSB, RA, RS, RC); - /*0x3d7*/bind_instr(g1f_list, STFIWX, FRS, RA, RB); - /*0x3da*/bind_instr(g1f_list, EXTSW, RA, RS, RC); - /*0x3d6*/bind_instr(g1f_list, ICBI, RA, RB); - /*0x3f6*/bind_instr(g1f_list, DCBZ, RA, RB); - - bind_instr(g3a_list, LD, RD, RA, DS); - bind_instr(g3a_list, LDU, RD, RA, DS); - bind_instr(g3a_list, LWA, RD, RA, DS); - - bind_instr(g3b_list, FDIVS, FRD, FRA, FRB, RC); - bind_instr(g3b_list, FSUBS, FRD, FRA, FRB, RC); - bind_instr(g3b_list, FADDS, FRD, FRA, FRB, RC); - bind_instr(g3b_list, FSQRTS, FRD, FRB, RC); - bind_instr(g3b_list, FRES, FRD, FRB, RC); - bind_instr(g3b_list, FMULS, FRD, FRA, FRC, RC); - bind_instr(g3b_list, FMADDS, FRD, FRA, FRC, FRB, RC); - bind_instr(g3b_list, FMSUBS, FRD, FRA, FRC, FRB, RC); - bind_instr(g3b_list, FNMSUBS, FRD, FRA, FRC, FRB, RC); - bind_instr(g3b_list, FNMADDS, FRD, FRA, FRC, FRB, RC); - - bind_instr(g3e_list, STD, RS, RA, DS); - bind_instr(g3e_list, STDU, RS, RA, DS); - - bind_instr(g3f_list, FSEL, FRD, FRA, FRC, FRB, RC); - bind_instr(g3f_list, FMUL, FRD, FRA, FRC, RC); - bind_instr(g3f_list, FMSUB, FRD, FRA, FRC, FRB, RC); - bind_instr(g3f_list, FMADD, FRD, FRA, FRC, FRB, RC); - bind_instr(g3f_list, FNMSUB, FRD, FRA, FRC, FRB, RC); - bind_instr(g3f_list, FNMADD, FRD, FRA, FRC, FRB, RC); - - bind_instr(g3f_0_list, FDIV, FRD, FRA, FRB, RC); - bind_instr(g3f_0_list, FSUB, FRD, FRA, FRB, RC); - bind_instr(g3f_0_list, FADD, FRD, FRA, FRB, RC); - bind_instr(g3f_0_list, FSQRT, FRD, FRB, RC); - bind_instr(g3f_0_list, FRSQRTE, FRD, FRB, RC); - bind_instr(g3f_0_list, FCMPU, CRFD, FRA, FRB); - bind_instr(g3f_0_list, FRSP, FRD, FRB, RC); - bind_instr(g3f_0_list, FCTIW, FRD, FRB, RC); - bind_instr(g3f_0_list, FCTIWZ, FRD, FRB, RC); - bind_instr(g3f_0_list, FCMPO, CRFD, FRA, FRB); - bind_instr(g3f_0_list, FNEG, FRD, FRB, RC); - bind_instr(g3f_0_list, FMR, FRD, FRB, RC); - bind_instr(g3f_0_list, FNABS, FRD, FRB, RC); - bind_instr(g3f_0_list, FABS, FRD, FRB, RC); - bind_instr(g3f_0_list, FCFID, FRD, FRB, RC); - bind_instr(g3f_0_list, FCTID, FRD, FRB, RC); - bind_instr(g3f_0_list, FCTIDZ, FRD, FRB, RC); - - bind_instr(g3f_0_list, MTFSB1, CRBD, RC); - bind_instr(g3f_0_list, MCRFS, CRFD, CRFS); - bind_instr(g3f_0_list, MTFSB0, CRBD, RC); - bind_instr(g3f_0_list, MTFSFI, CRFD, I, RC); - bind_instr(g3f_0_list, MFFS, FRD, RC); - bind_instr(g3f_0_list, MTFSF, FM, FRB, RC); - - static auto LIS = std::bind(ADDIS, std::placeholders::_1, 0, std::placeholders::_2); - static auto NOP = std::bind(ORI, 0, 0, 0); - static auto BLR = std::bind(BCLR, 0x10 | 0x04, 0, 0, 0); + /* + Absolute address bit. + 0 The immediate field represents an address relative to the current instruction address (CIA). (For more + information on the CIA, see Table 8-3.) The effective (logical) address of the branch is either the sum + of the LI field sign-extended to 64 bits and the address of the branch instruction or the sum of the BD + field sign-extended to 64 bits and the address of the branch instruction. + 1 The immediate field represents an absolute address. The effective address (EA) of the branch is the + LI field sign-extended to 64 bits or the BD field sign-extended to 64 bits. + */ + static CodeField<30> AA; + static CodeFieldSignedOffset<6, 29, 2> LI(FIELD_BRANCH); + + // + static CodeFieldSignedOffset<6, 29, 2> LL(FIELD_BRANCH); + /* + Link bit. + 0 Does not update the link register (LR). + 1 Updates the LR. If the instruction is a branch instruction, the address of the instruction following the + branch instruction is placed into the LR. + */ + static CodeField<31> LK; + + //This field is used for extended arithmetic to enable setting OV and SO in the XER + static CodeField<21> OE; + + //Field used to specify whether an integer compare instruction is to compare 64-bit numbers or 32-bit numbers + static CodeField<10> L_10; + static CodeField<6> L_6; + static CodeField<9, 10> L_9_10; + static CodeField<11> L_11; + // + static CodeField<16, 19> I; + + // + static CodeField<16, 27> DQ; + + //This field is used to specify an FPR as the destination + static CodeField<6, 10> FRD; + + //This field is used to specify an FPR as a source + static CodeField<6, 10> FRS; + + // + static CodeField<7, 14> FM; + + //This field is used to specify an FPR as a source + static CodeField<11, 15> FRA(FIELD_R_FPR); + + //This field is used to specify an FPR as a source + static CodeField<16, 20> FRB(FIELD_R_FPR); + + //This field is used to specify an FPR as a source + static CodeField<21, 25> FRC(FIELD_R_FPR); + + //This field mask is used to identify the CR fields that are to be updated by the mtcrf instruction. + static CodeField<12, 19> CRM; + + // + static CodeField<6, 31> SYS; + + //Immediate field specifying a 16-bit signed two's complement integer that is sign-extended to 64 bits + static CodeFieldSigned<16, 31> D; + + // + static CodeFieldSignedOffset<16, 29, 2> DS; + + //This immediate field is used to specify a 16-bit signed integer + static CodeFieldSigned<16, 31> simm16; + + //This immediate field is used to specify a 16-bit unsigned integer + static CodeField<16, 31> uimm16; + + /* + Record bit. + 0 Does not update the condition register (CR). + 1 Updates the CR to reflect the result of the operation. + For integer instructions, CR bits [0-2] are set to reflect the result as a signed quantity and CR bit [3] + receives a copy of the summary overflow bit, XER[SO]. The result as an unsigned quantity or a bit + string can be deduced from the EQ bit. For floating-point instructions, CR bits [4-7] are set to reflect + floating-point exception, floating-point enabled exception, floating-point invalid operation exception, + and floating-point overflow exception. + */ + static CodeField<31> RC; + + //Primary opcode field + static CodeField<0, 5> OPCD; + + static CodeField<26, 31> GD_04; //0x3f + static CodeField<21, 31> GD_04_0;//0x7ff + static CodeField<21, 30> GD_13; //0x3ff + static CodeField<27, 29> GD_1e; //0x7 + static CodeField<21, 30> GD_1f; //0x3ff + static CodeField<30, 31> GD_3a; //0x3 + static CodeField<26, 30> GD_3b; //0x1f + static CodeField<30, 31> GD_3e; //0x3 + static CodeField<26, 30> GD_3f;//0x1f + static CodeField<21, 30> GD_3f_0; //0x3ff + + static CodeField<9, 10> STRM; + } + + namespace lists + { + using namespace fields; + + //static auto main_list = new_list(OPCD, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, OPCD)); + static InstrList<1 << CodeField<0, 5>::size, ::PPUOpcodes> main_list_obj(OPCD, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, OPCD)); + static auto main_list = &main_list_obj; + static auto g04_list = new_list(main_list, PPU_opcodes::G_04, GD_04); + static auto g04_0_list = new_list(g04_list, GD_04_0, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_04_0)); + static auto g13_list = new_list(main_list, PPU_opcodes::G_13, GD_13, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_13)); + static auto g1e_list = new_list(main_list, PPU_opcodes::G_1e, GD_1e, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_1e)); + static auto g1f_list = new_list(main_list, PPU_opcodes::G_1f, GD_1f, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_1f)); + static auto g3a_list = new_list(main_list, PPU_opcodes::G_3a, GD_3a, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_3a)); + static auto g3b_list = new_list(main_list, PPU_opcodes::G_3b, GD_3b, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_3b)); + static auto g3e_list = new_list(main_list, PPU_opcodes::G_3e, GD_3e, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_3e)); + static auto g3f_list = new_list(main_list, PPU_opcodes::G_3f, GD_3f); + static auto g3f_0_list = new_list(g3f_list, GD_3f_0, instr_bind(&PPUOpcodes::UNK, GetCode, OPCD, GD_3f_0)); + +#define bind_instr(list, name, ...) \ + static const auto& name = make_instr(list, #name, &PPUOpcodes::name, ##__VA_ARGS__) + + bind_instr(main_list, TDI, TO, RA, simm16); + bind_instr(main_list, TWI, TO, RA, simm16); + bind_instr(main_list, MULLI, RD, RA, simm16); + bind_instr(main_list, SUBFIC, RD, RA, simm16); + bind_instr(main_list, CMPLI, CRFD, L_10, RA, uimm16); + bind_instr(main_list, CMPI, CRFD, L_10, RA, simm16); + bind_instr(main_list, ADDIC, RD, RA, simm16); + bind_instr(main_list, ADDIC_, RD, RA, simm16); + bind_instr(main_list, ADDI, RD, RA, simm16); + bind_instr(main_list, ADDIS, RD, RA, simm16); + bind_instr(main_list, BC, BO, BI, BD, AA, LK); + bind_instr(main_list, SC, SYS); + bind_instr(main_list, B, LI, AA, LK); + bind_instr(main_list, RLWIMI, RA, RS, SH, MB, ME, RC); + bind_instr(main_list, RLWINM, RA, RS, SH, MB, ME, RC); + bind_instr(main_list, RLWNM, RA, RS, RB, MB, ME, RC); + bind_instr(main_list, ORI, RA, RS, uimm16); + bind_instr(main_list, ORIS, RA, RS, uimm16); + bind_instr(main_list, XORI, RA, RS, uimm16); + bind_instr(main_list, XORIS, RA, RS, uimm16); + bind_instr(main_list, ANDI_, RA, RS, uimm16); + bind_instr(main_list, ANDIS_, RA, RS, uimm16); + bind_instr(main_list, LWZ, RD, RA, D); + bind_instr(main_list, LWZU, RD, RA, D); + bind_instr(main_list, LBZ, RD, RA, D); + bind_instr(main_list, LBZU, RD, RA, D); + bind_instr(main_list, STW, RS, RA, D); + bind_instr(main_list, STWU, RS, RA, D); + bind_instr(main_list, STB, RS, RA, D); + bind_instr(main_list, STBU, RS, RA, D); + bind_instr(main_list, LHZ, RD, RA, D); + bind_instr(main_list, LHZU, RD, RA, D); + bind_instr(main_list, LHA, RD, RA, D); + bind_instr(main_list, LHAU, RD, RA, D); + bind_instr(main_list, STH, RS, RA, D); + bind_instr(main_list, STHU, RS, RA, D); + bind_instr(main_list, LMW, RD, RA, D); + bind_instr(main_list, STMW, RS, RA, D); + bind_instr(main_list, LFS, FRD, RA, D); + bind_instr(main_list, LFSU, FRD, RA, D); + bind_instr(main_list, LFD, FRD, RA, D); + bind_instr(main_list, LFDU, FRD, RA, D); + bind_instr(main_list, STFS, FRS, RA, D); + bind_instr(main_list, STFSU, FRS, RA, D); + bind_instr(main_list, STFD, FRS, RA, D); + bind_instr(main_list, STFDU, FRS, RA, D); + + bind_instr(g04_list, VMADDFP, VD, VA, VC, VB); + bind_instr(g04_list, VMHADDSHS, VD, VA, VB, VC); + bind_instr(g04_list, VMHRADDSHS, VD, VA, VB, VC); + bind_instr(g04_list, VMLADDUHM, VD, VA, VB, VC); + bind_instr(g04_list, VMSUMMBM, VD, VA, VB, VC); + bind_instr(g04_list, VMSUMSHM, VD, VA, VB, VC); + bind_instr(g04_list, VMSUMSHS, VD, VA, VB, VC); + bind_instr(g04_list, VMSUMUBM, VD, VA, VB, VC); + bind_instr(g04_list, VMSUMUHM, VD, VA, VB, VC); + bind_instr(g04_list, VMSUMUHS, VD, VA, VB, VC); + bind_instr(g04_list, VNMSUBFP, VD, VA, VC, VB); + bind_instr(g04_list, VPERM, VD, VA, VB, VC); + bind_instr(g04_list, VSEL, VD, VA, VB, VC); + bind_instr(g04_list, VSLDOI, VD, VA, VB, VSH); + + bind_instr(g04_0_list, MFVSCR, VD); + bind_instr(g04_0_list, MTVSCR, VB); + bind_instr(g04_0_list, VADDCUW, VD, VA, VB); + bind_instr(g04_0_list, VADDFP, VD, VA, VB); + bind_instr(g04_0_list, VADDSBS, VD, VA, VB); + bind_instr(g04_0_list, VADDSHS, VD, VA, VB); + bind_instr(g04_0_list, VADDSWS, VD, VA, VB); + bind_instr(g04_0_list, VADDUBM, VD, VA, VB); + bind_instr(g04_0_list, VADDUBS, VD, VA, VB); + bind_instr(g04_0_list, VADDUHM, VD, VA, VB); + bind_instr(g04_0_list, VADDUHS, VD, VA, VB); + bind_instr(g04_0_list, VADDUWM, VD, VA, VB); + bind_instr(g04_0_list, VADDUWS, VD, VA, VB); + bind_instr(g04_0_list, VAND, VD, VA, VB); + bind_instr(g04_0_list, VANDC, VD, VA, VB); + bind_instr(g04_0_list, VAVGSB, VD, VA, VB); + bind_instr(g04_0_list, VAVGSH, VD, VA, VB); + bind_instr(g04_0_list, VAVGSW, VD, VA, VB); + bind_instr(g04_0_list, VAVGUB, VD, VA, VB); + bind_instr(g04_0_list, VAVGUH, VD, VA, VB); + bind_instr(g04_0_list, VAVGUW, VD, VA, VB); + bind_instr(g04_0_list, VCFSX, VD, VUIMM, VB); + bind_instr(g04_0_list, VCFUX, VD, VUIMM, VB); + bind_instr(g04_0_list, VCMPBFP, VD, VA, VB); + bind_instr(g04_0_list, VCMPBFP_, VD, VA, VB); + bind_instr(g04_0_list, VCMPEQFP, VD, VA, VB); + bind_instr(g04_0_list, VCMPEQFP_, VD, VA, VB); + bind_instr(g04_0_list, VCMPEQUB, VD, VA, VB); + bind_instr(g04_0_list, VCMPEQUB_, VD, VA, VB); + bind_instr(g04_0_list, VCMPEQUH, VD, VA, VB); + bind_instr(g04_0_list, VCMPEQUH_, VD, VA, VB); + bind_instr(g04_0_list, VCMPEQUW, VD, VA, VB); + bind_instr(g04_0_list, VCMPEQUW_, VD, VA, VB); + bind_instr(g04_0_list, VCMPGEFP, VD, VA, VB); + bind_instr(g04_0_list, VCMPGEFP_, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTFP, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTFP_, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTSB, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTSB_, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTSH, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTSH_, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTSW, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTSW_, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTUB, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTUB_, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTUH, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTUH_, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTUW, VD, VA, VB); + bind_instr(g04_0_list, VCMPGTUW_, VD, VA, VB); + bind_instr(g04_0_list, VCTSXS, VD, VUIMM, VB); + bind_instr(g04_0_list, VCTUXS, VD, VUIMM, VB); + bind_instr(g04_0_list, VEXPTEFP, VD, VB); + bind_instr(g04_0_list, VLOGEFP, VD, VB); + bind_instr(g04_0_list, VMAXFP, VD, VA, VB); + bind_instr(g04_0_list, VMAXSB, VD, VA, VB); + bind_instr(g04_0_list, VMAXSH, VD, VA, VB); + bind_instr(g04_0_list, VMAXSW, VD, VA, VB); + bind_instr(g04_0_list, VMAXUB, VD, VA, VB); + bind_instr(g04_0_list, VMAXUH, VD, VA, VB); + bind_instr(g04_0_list, VMAXUW, VD, VA, VB); + bind_instr(g04_0_list, VMINFP, VD, VA, VB); + bind_instr(g04_0_list, VMINSB, VD, VA, VB); + bind_instr(g04_0_list, VMINSH, VD, VA, VB); + bind_instr(g04_0_list, VMINSW, VD, VA, VB); + bind_instr(g04_0_list, VMINUB, VD, VA, VB); + bind_instr(g04_0_list, VMINUH, VD, VA, VB); + bind_instr(g04_0_list, VMINUW, VD, VA, VB); + bind_instr(g04_0_list, VMRGHB, VD, VA, VB); + bind_instr(g04_0_list, VMRGHH, VD, VA, VB); + bind_instr(g04_0_list, VMRGHW, VD, VA, VB); + bind_instr(g04_0_list, VMRGLB, VD, VA, VB); + bind_instr(g04_0_list, VMRGLH, VD, VA, VB); + bind_instr(g04_0_list, VMRGLW, VD, VA, VB); + bind_instr(g04_0_list, VMULESB, VD, VA, VB); + bind_instr(g04_0_list, VMULESH, VD, VA, VB); + bind_instr(g04_0_list, VMULEUB, VD, VA, VB); + bind_instr(g04_0_list, VMULEUH, VD, VA, VB); + bind_instr(g04_0_list, VMULOSB, VD, VA, VB); + bind_instr(g04_0_list, VMULOSH, VD, VA, VB); + bind_instr(g04_0_list, VMULOUB, VD, VA, VB); + bind_instr(g04_0_list, VMULOUH, VD, VA, VB); + bind_instr(g04_0_list, VNOR, VD, VA, VB); + bind_instr(g04_0_list, VOR, VD, VA, VB); + bind_instr(g04_0_list, VPKPX, VD, VA, VB); + bind_instr(g04_0_list, VPKSHSS, VD, VA, VB); + bind_instr(g04_0_list, VPKSHUS, VD, VA, VB); + bind_instr(g04_0_list, VPKSWSS, VD, VA, VB); + bind_instr(g04_0_list, VPKSWUS, VD, VA, VB); + bind_instr(g04_0_list, VPKUHUM, VD, VA, VB); + bind_instr(g04_0_list, VPKUHUS, VD, VA, VB); + bind_instr(g04_0_list, VPKUWUM, VD, VA, VB); + bind_instr(g04_0_list, VPKUWUS, VD, VA, VB); + bind_instr(g04_0_list, VREFP, VD, VB); + bind_instr(g04_0_list, VRFIM, VD, VB); + bind_instr(g04_0_list, VRFIN, VD, VB); + bind_instr(g04_0_list, VRFIP, VD, VB); + bind_instr(g04_0_list, VRFIZ, VD, VB); + bind_instr(g04_0_list, VRLB, VD, VA, VB); + bind_instr(g04_0_list, VRLH, VD, VA, VB); + bind_instr(g04_0_list, VRLW, VD, VA, VB); + bind_instr(g04_0_list, VRSQRTEFP, VD, VB); + bind_instr(g04_0_list, VSL, VD, VA, VB); + bind_instr(g04_0_list, VSLB, VD, VA, VB); + bind_instr(g04_0_list, VSLH, VD, VA, VB); + bind_instr(g04_0_list, VSLO, VD, VA, VB); + bind_instr(g04_0_list, VSLW, VD, VA, VB); + bind_instr(g04_0_list, VSPLTB, VD, VUIMM, VB); + bind_instr(g04_0_list, VSPLTH, VD, VUIMM, VB); + bind_instr(g04_0_list, VSPLTISB, VD, VSIMM); + bind_instr(g04_0_list, VSPLTISH, VD, VSIMM); + bind_instr(g04_0_list, VSPLTISW, VD, VSIMM); + bind_instr(g04_0_list, VSPLTW, VD, VUIMM, VB); + bind_instr(g04_0_list, VSR, VD, VA, VB); + bind_instr(g04_0_list, VSRAB, VD, VA, VB); + bind_instr(g04_0_list, VSRAH, VD, VA, VB); + bind_instr(g04_0_list, VSRAW, VD, VA, VB); + bind_instr(g04_0_list, VSRB, VD, VA, VB); + bind_instr(g04_0_list, VSRH, VD, VA, VB); + bind_instr(g04_0_list, VSRO, VD, VA, VB); + bind_instr(g04_0_list, VSRW, VD, VA, VB); + bind_instr(g04_0_list, VSUBCUW, VD, VA, VB); + bind_instr(g04_0_list, VSUBFP, VD, VA, VB); + bind_instr(g04_0_list, VSUBSBS, VD, VA, VB); + bind_instr(g04_0_list, VSUBSHS, VD, VA, VB); + bind_instr(g04_0_list, VSUBSWS, VD, VA, VB); + bind_instr(g04_0_list, VSUBUBM, VD, VA, VB); + bind_instr(g04_0_list, VSUBUBS, VD, VA, VB); + bind_instr(g04_0_list, VSUBUHM, VD, VA, VB); + bind_instr(g04_0_list, VSUBUHS, VD, VA, VB); + bind_instr(g04_0_list, VSUBUWM, VD, VA, VB); + bind_instr(g04_0_list, VSUBUWS, VD, VA, VB); + bind_instr(g04_0_list, VSUMSWS, VD, VA, VB); + bind_instr(g04_0_list, VSUM2SWS, VD, VA, VB); + bind_instr(g04_0_list, VSUM4SBS, VD, VA, VB); + bind_instr(g04_0_list, VSUM4SHS, VD, VA, VB); + bind_instr(g04_0_list, VSUM4UBS, VD, VA, VB); + bind_instr(g04_0_list, VUPKHPX, VD, VB); + bind_instr(g04_0_list, VUPKHSB, VD, VB); + bind_instr(g04_0_list, VUPKHSH, VD, VB); + bind_instr(g04_0_list, VUPKLPX, VD, VB); + bind_instr(g04_0_list, VUPKLSB, VD, VB); + bind_instr(g04_0_list, VUPKLSH, VD, VB); + bind_instr(g04_0_list, VXOR, VD, VA, VB); + + bind_instr(g13_list, MCRF, CRFD, CRFS); + bind_instr(g13_list, BCLR, BO, BI, BH, LK); + bind_instr(g13_list, CRNOR, CRBD, CRBA, CRBB); + bind_instr(g13_list, CRANDC, CRBD, CRBA, CRBB); + bind_instr(g13_list, ISYNC); + bind_instr(g13_list, CRXOR, CRBD, CRBA, CRBB); + bind_instr(g13_list, CRNAND, CRBD, CRBA, CRBB); + bind_instr(g13_list, CRAND, CRBD, CRBA, CRBB); + bind_instr(g13_list, CREQV, CRBD, CRBA, CRBB); + bind_instr(g13_list, CRORC, CRBD, CRBA, CRBB); + bind_instr(g13_list, CROR, CRBD, CRBA, CRBB); + bind_instr(g13_list, BCCTR, BO, BI, BH, LK); + + bind_instr(g1e_list, RLDICL, RA, RS, sh, mb, RC); + bind_instr(g1e_list, RLDICR, RA, RS, sh, me, RC); + bind_instr(g1e_list, RLDIC, RA, RS, sh, mb, RC); + bind_instr(g1e_list, RLDIMI, RA, RS, sh, mb, RC); + bind_instr(g1e_list, RLDC_LR, RA, RS, RB, mb, AA, RC); + + /*0x000*/bind_instr(g1f_list, CMP, CRFD, L_10, RA, RB); + /*0x004*/bind_instr(g1f_list, TW, TO, RA, RB); + /*0x006*/bind_instr(g1f_list, LVSL, VD, RA, RB); + /*0x007*/bind_instr(g1f_list, LVEBX, VD, RA, RB); + /*0x008*/bind_instr(g1f_list, SUBFC, RD, RA, RB, OE, RC); + /*0x009*/bind_instr(g1f_list, MULHDU, RD, RA, RB, RC); + /*0x00a*/bind_instr(g1f_list, ADDC, RD, RA, RB, OE, RC); + /*0x00b*/bind_instr(g1f_list, MULHWU, RD, RA, RB, RC); + /*0x013*/bind_instr(g1f_list, MFOCRF, L_11, RD, CRM); + /*0x014*/bind_instr(g1f_list, LWARX, RD, RA, RB); + /*0x015*/bind_instr(g1f_list, LDX, RD, RA, RB); + /*0x017*/bind_instr(g1f_list, LWZX, RD, RA, RB); + /*0x018*/bind_instr(g1f_list, SLW, RA, RS, RB, RC); + /*0x01a*/bind_instr(g1f_list, CNTLZW, RA, RS, RC); + /*0x01b*/bind_instr(g1f_list, SLD, RA, RS, RB, RC); + /*0x01c*/bind_instr(g1f_list, AND, RA, RS, RB, RC); + /*0x020*/bind_instr(g1f_list, CMPL, CRFD, L_10, RA, RB); + /*0x026*/bind_instr(g1f_list, LVSR, VD, RA, RB); + /*0x027*/bind_instr(g1f_list, LVEHX, VD, RA, RB); + /*0x028*/bind_instr(g1f_list, SUBF, RD, RA, RB, OE, RC); + /*0x035*/bind_instr(g1f_list, LDUX, RD, RA, RB); + /*0x036*/bind_instr(g1f_list, DCBST, RA, RB); + /*0x037*/bind_instr(g1f_list, LWZUX, RD, RA, RB); + /*0x03a*/bind_instr(g1f_list, CNTLZD, RA, RS, RC); + /*0x03c*/bind_instr(g1f_list, ANDC, RA, RS, RB, RC); + /*0x03c*/bind_instr(g1f_list, TD, TO, RA, RB); + /*0x047*/bind_instr(g1f_list, LVEWX, VD, RA, RB); + /*0x049*/bind_instr(g1f_list, MULHD, RD, RA, RB, RC); + /*0x04b*/bind_instr(g1f_list, MULHW, RD, RA, RB, RC); + /*0x054*/bind_instr(g1f_list, LDARX, RD, RA, RB); + /*0x056*/bind_instr(g1f_list, DCBF, RA, RB); + /*0x057*/bind_instr(g1f_list, LBZX, RD, RA, RB); + /*0x067*/bind_instr(g1f_list, LVX, VD, RA, RB); + /*0x068*/bind_instr(g1f_list, NEG, RD, RA, OE, RC); + /*0x077*/bind_instr(g1f_list, LBZUX, RD, RA, RB); + /*0x07c*/bind_instr(g1f_list, NOR, RA, RS, RB, RC); + /*0x087*/bind_instr(g1f_list, STVEBX, VS, RA, RB); + /*0x088*/bind_instr(g1f_list, SUBFE, RD, RA, RB, OE, RC); + /*0x08a*/bind_instr(g1f_list, ADDE, RD, RA, RB, OE, RC); + /*0x090*/bind_instr(g1f_list, MTOCRF, L_11, CRM, RS); + /*0x095*/bind_instr(g1f_list, STDX, RS, RA, RB); + /*0x096*/bind_instr(g1f_list, STWCX_, RS, RA, RB); + /*0x097*/bind_instr(g1f_list, STWX, RS, RA, RB); + /*0x0a7*/bind_instr(g1f_list, STVEHX, VS, RA, RB); + /*0x0b5*/bind_instr(g1f_list, STDUX, RS, RA, RB); + /*0x0b7*/bind_instr(g1f_list, STWUX, RS, RA, RB); + /*0x0c7*/bind_instr(g1f_list, STVEWX, VS, RA, RB); + /*0x0c8*/bind_instr(g1f_list, SUBFZE, RD, RA, OE, RC); + /*0x0ca*/bind_instr(g1f_list, ADDZE, RD, RA, OE, RC); + /*0x0d6*/bind_instr(g1f_list, STDCX_, RS, RA, RB); + /*0x0d7*/bind_instr(g1f_list, STBX, RS, RA, RB); + /*0x0e7*/bind_instr(g1f_list, STVX, VS, RA, RB); + /*0x0e8*/bind_instr(g1f_list, SUBFME, RD, RA, OE, RC); + /*0x0e9*/bind_instr(g1f_list, MULLD, RD, RA, RB, OE, RC); + /*0x0ea*/bind_instr(g1f_list, ADDME, RD, RA, OE, RC); + /*0x0eb*/bind_instr(g1f_list, MULLW, RD, RA, RB, OE, RC); + /*0x0f6*/bind_instr(g1f_list, DCBTST, RA, RB, TH); + /*0x0f7*/bind_instr(g1f_list, STBUX, RS, RA, RB); + /*0x10a*/bind_instr(g1f_list, ADD, RD, RA, RB, OE, RC); + /*0x116*/bind_instr(g1f_list, DCBT, RA, RB, TH); + /*0x117*/bind_instr(g1f_list, LHZX, RD, RA, RB); + /*0x11c*/bind_instr(g1f_list, EQV, RA, RS, RB, RC); + /*0x136*/bind_instr(g1f_list, ECIWX, RD, RA, RB); + /*0x137*/bind_instr(g1f_list, LHZUX, RD, RA, RB); + /*0x13c*/bind_instr(g1f_list, XOR, RA, RS, RB, RC); + /*0x153*/bind_instr(g1f_list, MFSPR, RD, SPR); + /*0x155*/bind_instr(g1f_list, LWAX, RD, RA, RB); + /*0x156*/bind_instr(g1f_list, DST, RA, RB, STRM, L_6); + /*0x157*/bind_instr(g1f_list, LHAX, RD, RA, RB); + /*0x167*/bind_instr(g1f_list, LVXL, VD, RA, RB); + /*0x173*/bind_instr(g1f_list, MFTB, RD, SPR); + /*0x175*/bind_instr(g1f_list, LWAUX, RD, RA, RB); + /*0x176*/bind_instr(g1f_list, DSTST, RA, RB, STRM, L_6); + /*0x177*/bind_instr(g1f_list, LHAUX, RD, RA, RB); + /*0x197*/bind_instr(g1f_list, STHX, RS, RA, RB); + /*0x19c*/bind_instr(g1f_list, ORC, RA, RS, RB, RC); + /*0x1b6*/bind_instr(g1f_list, ECOWX, RS, RA, RB); + /*0x1b7*/bind_instr(g1f_list, STHUX, RS, RA, RB); + /*0x1bc*/bind_instr(g1f_list, OR, RA, RS, RB, RC); + /*0x1c9*/bind_instr(g1f_list, DIVDU, RD, RA, RB, OE, RC); + /*0x1cb*/bind_instr(g1f_list, DIVWU, RD, RA, RB, OE, RC); + /*0x1d3*/bind_instr(g1f_list, MTSPR, SPR, RS); + /*0x1d6*///DCBI + /*0x1dc*/bind_instr(g1f_list, NAND, RA, RS, RB, RC); + /*0x1e7*/bind_instr(g1f_list, STVXL, VS, RA, RB); + /*0x1e9*/bind_instr(g1f_list, DIVD, RD, RA, RB, OE, RC); + /*0x1eb*/bind_instr(g1f_list, DIVW, RD, RA, RB, OE, RC); + /*0x207*/bind_instr(g1f_list, LVLX, VD, RA, RB); + /*0x214*/bind_instr(g1f_list, LDBRX, RD, RA, RB); + /*0x215*/bind_instr(g1f_list, LSWX, RD, RA, RB); + /*0x216*/bind_instr(g1f_list, LWBRX, RD, RA, RB); + /*0x217*/bind_instr(g1f_list, LFSX, FRD, RA, RB); + /*0x218*/bind_instr(g1f_list, SRW, RA, RS, RB, RC); + /*0x21b*/bind_instr(g1f_list, SRD, RA, RS, RB, RC); + /*0x227*/bind_instr(g1f_list, LVRX, VD, RA, RB); + /*0x237*/bind_instr(g1f_list, LFSUX, FRD, RA, RB); + /*0x255*/bind_instr(g1f_list, LSWI, RD, RA, NB); + /*0x256*/bind_instr(g1f_list, SYNC, L_9_10); + /*0x257*/bind_instr(g1f_list, LFDX, FRD, RA, RB); + /*0x277*/bind_instr(g1f_list, LFDUX, FRD, RA, RB); + /*0x287*/bind_instr(g1f_list, STVLX, VS, RA, RB); + /*0x296*/bind_instr(g1f_list, STSWX, RS, RA, RB); + /*0x296*/bind_instr(g1f_list, STWBRX, RS, RA, RB); + /*0x297*/bind_instr(g1f_list, STFSX, FRS, RA, RB); + /*0x2a7*/bind_instr(g1f_list, STVRX, VS, RA, RB); + /*0x2b7*/bind_instr(g1f_list, STFSUX, FRS, RA, RB); + /*0x2d5*/bind_instr(g1f_list, STSWI, RS, RA, NB); + /*0x2d7*/bind_instr(g1f_list, STFDX, FRS, RA, RB); + /*0x2d7*/bind_instr(g1f_list, STFDUX, FRS, RA, RB); + /*0x307*/bind_instr(g1f_list, LVLXL, VD, RA, RB); + /*0x316*/bind_instr(g1f_list, LHBRX, RD, RA, RB); + /*0x318*/bind_instr(g1f_list, SRAW, RA, RS, RB, RC); + /*0x31a*/bind_instr(g1f_list, SRAD, RA, RS, RB, RC); + /*0x327*/bind_instr(g1f_list, LVRXL, VD, RA, RB); + /*0x336*/bind_instr(g1f_list, DSS, STRM, L_6); + /*0x338*/bind_instr(g1f_list, SRAWI, RA, RS, SH, RC); + /*0x33a*/bind_instr(g1f_list, SRADI1, RA, RS, sh, RC); + /*0x33b*/bind_instr(g1f_list, SRADI2, RA, RS, sh, RC); + /*0x356*/bind_instr(g1f_list, EIEIO); + /*0x387*/bind_instr(g1f_list, STVLXL, VS, RA, RB); + /*0x396*/bind_instr(g1f_list, STHBRX, RS, RA, RB); + /*0x39a*/bind_instr(g1f_list, EXTSH, RA, RS, RC); + /*0x387*/bind_instr(g1f_list, STVRXL, VS, RA, RB); + /*0x3ba*/bind_instr(g1f_list, EXTSB, RA, RS, RC); + /*0x3d7*/bind_instr(g1f_list, STFIWX, FRS, RA, RB); + /*0x3da*/bind_instr(g1f_list, EXTSW, RA, RS, RC); + /*0x3d6*/bind_instr(g1f_list, ICBI, RA, RB); + /*0x3f6*/bind_instr(g1f_list, DCBZ, RA, RB); + + bind_instr(g3a_list, LD, RD, RA, DS); + bind_instr(g3a_list, LDU, RD, RA, DS); + bind_instr(g3a_list, LWA, RD, RA, DS); + + bind_instr(g3b_list, FDIVS, FRD, FRA, FRB, RC); + bind_instr(g3b_list, FSUBS, FRD, FRA, FRB, RC); + bind_instr(g3b_list, FADDS, FRD, FRA, FRB, RC); + bind_instr(g3b_list, FSQRTS, FRD, FRB, RC); + bind_instr(g3b_list, FRES, FRD, FRB, RC); + bind_instr(g3b_list, FMULS, FRD, FRA, FRC, RC); + bind_instr(g3b_list, FMADDS, FRD, FRA, FRC, FRB, RC); + bind_instr(g3b_list, FMSUBS, FRD, FRA, FRC, FRB, RC); + bind_instr(g3b_list, FNMSUBS, FRD, FRA, FRC, FRB, RC); + bind_instr(g3b_list, FNMADDS, FRD, FRA, FRC, FRB, RC); + + bind_instr(g3e_list, STD, RS, RA, DS); + bind_instr(g3e_list, STDU, RS, RA, DS); + + bind_instr(g3f_list, FSEL, FRD, FRA, FRC, FRB, RC); + bind_instr(g3f_list, FMUL, FRD, FRA, FRC, RC); + bind_instr(g3f_list, FMSUB, FRD, FRA, FRC, FRB, RC); + bind_instr(g3f_list, FMADD, FRD, FRA, FRC, FRB, RC); + bind_instr(g3f_list, FNMSUB, FRD, FRA, FRC, FRB, RC); + bind_instr(g3f_list, FNMADD, FRD, FRA, FRC, FRB, RC); + + bind_instr(g3f_0_list, FDIV, FRD, FRA, FRB, RC); + bind_instr(g3f_0_list, FSUB, FRD, FRA, FRB, RC); + bind_instr(g3f_0_list, FADD, FRD, FRA, FRB, RC); + bind_instr(g3f_0_list, FSQRT, FRD, FRB, RC); + bind_instr(g3f_0_list, FRSQRTE, FRD, FRB, RC); + bind_instr(g3f_0_list, FCMPU, CRFD, FRA, FRB); + bind_instr(g3f_0_list, FRSP, FRD, FRB, RC); + bind_instr(g3f_0_list, FCTIW, FRD, FRB, RC); + bind_instr(g3f_0_list, FCTIWZ, FRD, FRB, RC); + bind_instr(g3f_0_list, FCMPO, CRFD, FRA, FRB); + bind_instr(g3f_0_list, FNEG, FRD, FRB, RC); + bind_instr(g3f_0_list, FMR, FRD, FRB, RC); + bind_instr(g3f_0_list, FNABS, FRD, FRB, RC); + bind_instr(g3f_0_list, FABS, FRD, FRB, RC); + bind_instr(g3f_0_list, FCFID, FRD, FRB, RC); + bind_instr(g3f_0_list, FCTID, FRD, FRB, RC); + bind_instr(g3f_0_list, FCTIDZ, FRD, FRB, RC); + + bind_instr(g3f_0_list, MTFSB1, CRBD, RC); + bind_instr(g3f_0_list, MCRFS, CRFD, CRFS); + bind_instr(g3f_0_list, MTFSB0, CRBD, RC); + bind_instr(g3f_0_list, MTFSFI, CRFD, I, RC); + bind_instr(g3f_0_list, MFFS, FRD, RC); + bind_instr(g3f_0_list, MTFSF, FM, FRB, RC); + + enum + { + r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, + r12, r13, r14, r15, r16, r17, r18, r19, r20, r21, + r22, r23, r24, r25, r26, r27, r28, r29, r30, r31 + }; + } + + namespace implicts + { + using namespace lists; + + static auto LIS = std::bind(ADDIS, std::placeholders::_1, r0, std::placeholders::_2); + static auto LI = std::bind(ADDI, std::placeholders::_1, r0, std::placeholders::_2); + static auto NOP = std::bind(ORI, r0, r0, 0); + static auto MR = std::bind(OR, std::placeholders::_1, std::placeholders::_2, std::placeholders::_2, false); + static auto BLR = std::bind(BCLR, 0x10 | 0x04, 0, 0, 0); + static auto BCTR = std::bind(BCCTR, 0x10 | 0x04, 0, 0, 0); + static auto BCTRL = std::bind(BCCTR, 0x10 | 0x04, 0, 0, 1); + static auto MTCTR = std::bind(MTSPR, (0x1 << 5) | 0x8, std::placeholders::_1); + } + + + using namespace lists; + using namespace implicts; #undef bind_instr -}; +}; \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 104418d7bd..90dc8ccbf5 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -146,8 +146,25 @@ private: case 0x001: return CPU.XER.XER; case 0x008: return CPU.LR; case 0x009: return CPU.CTR; - case 0x100: return CPU.USPRG0; + case 0x100: + case 0x101: + case 0x102: + case 0x103: + case 0x104: + case 0x105: + case 0x106: + case 0x107: return CPU.USPRG[n - 0x100]; + case 0x10C: return get_time(); + + case 0x110: + case 0x111: + case 0x112: + case 0x113: + case 0x114: + case 0x115: + case 0x116: + case 0x117: return CPU.SPRG[n - 0x110]; } UNK(fmt::Format("ReadSPR error: Unknown SPR 0x%x!", n)); @@ -163,8 +180,25 @@ private: case 0x001: CPU.XER.XER = value; return; case 0x008: CPU.LR = value; return; case 0x009: CPU.CTR = value; return; - case 0x100: CPU.USPRG0 = value; return; + case 0x100: + case 0x101: + case 0x102: + case 0x103: + case 0x104: + case 0x105: + case 0x106: + case 0x107: CPU.USPRG[n - 0x100] = value; break; + case 0x10C: UNK("WriteSPR: Write to time-based SPR. Report this to a developer!"); return; + + case 0x110: + case 0x111: + case 0x112: + case 0x113: + case 0x114: + case 0x115: + case 0x116: + case 0x117: CPU.SPRG[n - 0x110] = value; break; } UNK(fmt::Format("WriteSPR error: Unknown SPR 0x%x!", n)); diff --git a/rpcs3/Emu/Cell/PPUProgramCompiler.cpp b/rpcs3/Emu/Cell/PPUProgramCompiler.cpp index 9c3b760ea7..532b26d36a 100644 --- a/rpcs3/Emu/Cell/PPUProgramCompiler.cpp +++ b/rpcs3/Emu/Cell/PPUProgramCompiler.cpp @@ -1,7 +1,7 @@ #include "stdafx_gui.h" #include "PPUProgramCompiler.h" #include "Utilities/rFile.h" - +/* using namespace PPU_instr; template @@ -78,10 +78,10 @@ SectionInfo::SectionInfo(const std::string& _name) section_name_offs += name.length() + 1; } -void SectionInfo::SetDataSize(u32 size, u32 align) +void SectionInfo::SetDataSize(u32 size, u32 addralign) { - if(align) shdr.sh_addralign = align; - if(shdr.sh_addralign) size = AlignAddr(size, shdr.sh_addralign); + if (addralign) shdr.sh_addralign = addralign; + if (shdr.sh_addralign) size = align(size, shdr.sh_addralign); if(!code.empty()) { @@ -985,7 +985,7 @@ void CompilePPUProgram::Compile() elf_info.e_shnum = 15; elf_info.e_shstrndx = elf_info.e_shnum - 1; elf_info.e_phoff = elf_info.e_ehsize; - u32 section_offset = AlignAddr(elf_info.e_phoff + elf_info.e_phnum * elf_info.e_phentsize, 0x100); + u32 section_offset = align(elf_info.e_phoff + elf_info.e_phnum * elf_info.e_phentsize, 0x100); static const u32 sceStub_text_block = 8 * 4; @@ -1143,7 +1143,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_sceStub_text; memset(&s_sceStub_text, 0, sizeof(Elf64_Shdr)); s_sceStub_text.sh_addralign = 4; - section_offset = AlignAddr(section_offset, s_sceStub_text.sh_addralign); + section_offset = align(section_offset, s_sceStub_text.sh_addralign); s_sceStub_text.sh_type = 1; s_sceStub_text.sh_offset = section_offset; s_sceStub_text.sh_addr = section_offset + 0x10000; @@ -1167,7 +1167,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_lib_stub_top; memset(&s_lib_stub_top, 0, sizeof(Elf64_Shdr)); s_lib_stub_top.sh_addralign = 4; - section_offset = AlignAddr(section_offset, s_lib_stub_top.sh_addralign); + section_offset = align(section_offset, s_lib_stub_top.sh_addralign); s_lib_stub_top.sh_type = 1; s_lib_stub_top.sh_name = section_name_offset; s_lib_stub_top.sh_offset = section_offset; @@ -1186,7 +1186,7 @@ void CompilePPUProgram::Compile() s_lib_stub.sh_offset = section_offset; s_lib_stub.sh_addr = section_offset + 0x10000; s_lib_stub.sh_flags = 2; - s_lib_stub.sh_size = sizeof(Elf64_StubHeader) * modules.size(); + s_lib_stub.sh_size = sizeof(sys_stub) * modules.size(); sections_names.push_back(".lib.stub"); section_name_offset += std::string(".lib.stub").length() + 1; section_offset += s_lib_stub.sh_size; @@ -1207,7 +1207,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_rodata_sceFNID; memset(&s_rodata_sceFNID, 0, sizeof(Elf64_Shdr)); s_rodata_sceFNID.sh_addralign = 4; - section_offset = AlignAddr(section_offset, s_rodata_sceFNID.sh_addralign); + section_offset = align(section_offset, s_rodata_sceFNID.sh_addralign); s_rodata_sceFNID.sh_type = 1; s_rodata_sceFNID.sh_name = section_name_offset; s_rodata_sceFNID.sh_offset = section_offset; @@ -1221,7 +1221,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_rodata_sceResident; memset(&s_rodata_sceResident, 0, sizeof(Elf64_Shdr)); s_rodata_sceResident.sh_addralign = 4; - section_offset = AlignAddr(section_offset, s_rodata_sceResident.sh_addralign); + section_offset = align(section_offset, s_rodata_sceResident.sh_addralign); s_rodata_sceResident.sh_type = 1; s_rodata_sceResident.sh_name = section_name_offset; s_rodata_sceResident.sh_offset = section_offset; @@ -1232,7 +1232,7 @@ void CompilePPUProgram::Compile() { s_rodata_sceResident.sh_size += module.m_name.length() + 1; } - s_rodata_sceResident.sh_size = AlignAddr(s_rodata_sceResident.sh_size, s_rodata_sceResident.sh_addralign); + s_rodata_sceResident.sh_size = align(s_rodata_sceResident.sh_size, s_rodata_sceResident.sh_addralign); sections_names.push_back(".rodata.sceResident"); section_name_offset += std::string(".rodata.sceResident").length() + 1; section_offset += s_rodata_sceResident.sh_size; @@ -1240,7 +1240,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_lib_ent_top; memset(&s_lib_ent_top, 0, sizeof(Elf64_Shdr)); s_lib_ent_top.sh_addralign = 4; - section_offset = AlignAddr(section_offset, s_lib_ent_top.sh_addralign); + section_offset = align(section_offset, s_lib_ent_top.sh_addralign); s_lib_ent_top.sh_size = 4; s_lib_ent_top.sh_flags = 2; s_lib_ent_top.sh_type = 1; @@ -1267,7 +1267,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_sys_proc_prx_param; memset(&s_sys_proc_prx_param, 0, sizeof(Elf64_Shdr)); s_sys_proc_prx_param.sh_addralign = 4; - section_offset = AlignAddr(section_offset, s_sys_proc_prx_param.sh_addralign); + section_offset = align(section_offset, s_sys_proc_prx_param.sh_addralign); s_sys_proc_prx_param.sh_type = 1; s_sys_proc_prx_param.sh_size = sizeof(sys_proc_prx_param); s_sys_proc_prx_param.sh_name = section_name_offset; @@ -1280,14 +1280,14 @@ void CompilePPUProgram::Compile() const u32 prog_load_0_end = section_offset; - section_offset = AlignAddr(section_offset + 0x10000, 0x10000); + section_offset = align(section_offset + 0x10000, 0x10000); const u32 prog_load_1_start = section_offset; Elf64_Shdr s_data_sceFStub; memset(&s_data_sceFStub, 0, sizeof(Elf64_Shdr)); s_data_sceFStub.sh_name = section_name_offset; s_data_sceFStub.sh_addralign = 4; - section_offset = AlignAddr(section_offset, s_data_sceFStub.sh_addralign); + section_offset = align(section_offset, s_data_sceFStub.sh_addralign); s_data_sceFStub.sh_flags = 3; s_data_sceFStub.sh_type = 1; s_data_sceFStub.sh_offset = section_offset; @@ -1300,7 +1300,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_tbss; memset(&s_tbss, 0, sizeof(Elf64_Shdr)); s_tbss.sh_addralign = 4; - section_offset = AlignAddr(section_offset, s_tbss.sh_addralign); + section_offset = align(section_offset, s_tbss.sh_addralign); s_tbss.sh_size = 4; s_tbss.sh_flags = 0x403; s_tbss.sh_type = 8; @@ -1314,7 +1314,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_opd; memset(&s_opd, 0, sizeof(Elf64_Shdr)); s_opd.sh_addralign = 8; - section_offset = AlignAddr(section_offset, s_opd.sh_addralign); + section_offset = align(section_offset, s_opd.sh_addralign); s_opd.sh_size = 2*4; s_opd.sh_type = 1; s_opd.sh_offset = section_offset; @@ -1475,7 +1475,7 @@ void CompilePPUProgram::Compile() if(!m_file_path.empty() && !m_analyze && !m_error) { - s_opd.sh_size = AlignAddr(s_opd.sh_size, s_opd.sh_addralign); + s_opd.sh_size = align(s_opd.sh_size, s_opd.sh_addralign); section_offset += s_opd.sh_size; const u32 prog_load_1_end = section_offset; @@ -1483,7 +1483,7 @@ void CompilePPUProgram::Compile() Elf64_Shdr s_shstrtab; memset(&s_shstrtab, 0, sizeof(Elf64_Shdr)); s_shstrtab.sh_addralign = 1; - section_offset = AlignAddr(section_offset, s_shstrtab.sh_addralign); + section_offset = align(section_offset, s_shstrtab.sh_addralign); s_shstrtab.sh_name = section_name_offset; s_shstrtab.sh_type = 3; s_shstrtab.sh_offset = section_offset; @@ -1505,7 +1505,7 @@ void CompilePPUProgram::Compile() elf_info.e_machine = MACHINE_PPC64; //PowerPC64 elf_info.e_version = 1; //ver 1 elf_info.e_flags = 0x0; - elf_info.e_shoff = AlignAddr(section_offset, 4); + elf_info.e_shoff = align(section_offset, 4); u8* opd_data = new u8[s_opd.sh_size]; u32 entry_point = s_text.sh_addr; @@ -1523,14 +1523,14 @@ void CompilePPUProgram::Compile() sys_proc_prx_param prx_param; memset(&prx_param, 0, sizeof(sys_proc_prx_param)); - prx_param.size = re32(0x40); - prx_param.magic = re32(0x1b434cec); - prx_param.version = re32(0x4); - prx_param.libentstart = re32(s_lib_ent_top.sh_addr + s_lib_ent_top.sh_size); - prx_param.libentend = re32(s_lib_ent_btm.sh_addr); - prx_param.libstubstart = re32(s_lib_stub_top.sh_addr + s_lib_stub_top.sh_size); - prx_param.libstubend = re32(s_lib_stub_btm.sh_addr); - prx_param.ver = re16(0x101); + prx_param.size = 0x40; + prx_param.magic = 0x1b434cec; + prx_param.version = 0x4; + prx_param.libentstart = s_lib_ent_top.sh_addr + s_lib_ent_top.sh_size; + prx_param.libentend = s_lib_ent_btm.sh_addr; + prx_param.libstubstart = vm::bptr::make(s_lib_stub_top.sh_addr + s_lib_stub_top.sh_size); + prx_param.libstubend = vm::bptr::make(s_lib_stub_btm.sh_addr); + prx_param.ver = 0x101; elf_info.e_entry = s_opd.sh_addr; @@ -1588,20 +1588,20 @@ void CompilePPUProgram::Compile() f.Seek(s_lib_stub.sh_offset); for(u32 i=0, nameoffs=4, dataoffs=0; i::make(s_rodata_sceResident.sh_addr + nameoffs); + stub.s_nid = vm::bptr::make(s_rodata_sceFNID.sh_addr + dataoffs); + stub.s_text = vm::bptr::make(s_data_sceFStub.sh_addr + dataoffs); + stub.s_imports = modules[i].m_imports.size(); dataoffs += modules[i].m_imports.size() * 4; - f.Write(&stub, sizeof(Elf64_StubHeader)); + f.Write(&stub, sizeof(sys_stub)); nameoffs += modules[i].m_name.length() + 1; } @@ -1732,3 +1732,4 @@ void CompilePPUProgram::Compile() system("make_fself.cmd"); } } +*/ \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUProgramCompiler.h b/rpcs3/Emu/Cell/PPUProgramCompiler.h index 608fbc4c1e..5070c34516 100644 --- a/rpcs3/Emu/Cell/PPUProgramCompiler.h +++ b/rpcs3/Emu/Cell/PPUProgramCompiler.h @@ -1,7 +1,7 @@ #pragma once #include "PPUInstrTable.h" #include "Loader/ELF64.h" - +/* enum ArgType { ARG_ERR = 0, @@ -189,3 +189,4 @@ protected: public: void Compile(); }; +*/ \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index a950332fc7..e8189828a6 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -10,6 +10,8 @@ #include "Emu/Cell/PPUDecoder.h" #include "Emu/Cell/PPUInterpreter.h" #include "Emu/Cell/PPULLVMRecompiler.h" +//#include "Emu/Cell/PPURecompiler.h" +#include "Emu/CPU/CPUThreadManager.h" PPUThread& GetCurrentPPUThread() { @@ -39,11 +41,11 @@ void PPUThread::DoReset() memset(FPR, 0, sizeof(FPR)); memset(GPR, 0, sizeof(GPR)); memset(SPRG, 0, sizeof(SPRG)); + memset(USPRG, 0, sizeof(USPRG)); CR.CR = 0; LR = 0; CTR = 0; - USPRG0 = 0; TB = 0; XER.XER = 0; FPSCR.FPSCR = 0; @@ -84,8 +86,10 @@ void PPUThread::InitRegs() } */ - GPR[1] = AlignAddr(m_stack_addr + m_stack_size, 0x200) - 0x200; + GPR[1] = align(m_stack_addr + m_stack_size, 0x200) - 0x200; GPR[2] = rtoc; + GPR[11] = entry; + GPR[12] = Emu.GetMallocPageSize(); GPR[13] = Memory.PRXMem.GetStartAddr() + 0x7060; LR = Emu.GetPPUThreadExit(); @@ -122,6 +126,8 @@ void PPUThread::DoRun() #endif break; + //case 3: m_dec = new PPURecompiler(*this); break; + default: LOG_ERROR(PPU, "Invalid CPU decoder mode: %d", Ini.CPUDecoderMode.GetValue()); Emu.Pause(); @@ -218,12 +224,24 @@ void PPUThread::FastStop() void PPUThread::Task() { - if (m_custom_task) + if (custom_task) { - m_custom_task(*this); + custom_task(*this); } else { CPUThread::Task(); } } + +ppu_thread::ppu_thread(u32 entry, const std::string& name, u32 stack_size, u32 prio) +{ + thread = &Emu.GetCPU().AddThread(CPU_THREAD_PPU); + + thread->SetName(name); + thread->SetEntry(entry); + thread->SetStackSize(stack_size ? stack_size : Emu.GetInfo().GetProcParam().primary_stacksize); + thread->SetPrio(prio ? prio : Emu.GetInfo().GetProcParam().primary_prio); + + argc = 0; +} \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 1f408210b9..e7f50f81e6 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -1,5 +1,6 @@ #pragma once #include "Emu/Cell/PPCThread.h" +#include "Emu/Memory/vm.h" enum { @@ -530,11 +531,8 @@ public: u64 LR; //SPR 0x008 : Link Register u64 CTR; //SPR 0x009 : Count Register - union - { - u64 USPRG0; //SPR 0x100 : User-SPR General-Purpose Register 0 - u64 SPRG[8]; //SPR 0x100 - 0x107 : SPR General-Purpose Registers - }; + u64 USPRG[8]; //SPR 0x100 - 0x107: User-SPR General-Purpose Registers + u64 SPRG[8]; //SPR 0x110 - 0x117 : SPR General-Purpose Registers //TBR : Time-Base Registers union @@ -554,7 +552,7 @@ public: u64 R_VALUE; // reservation value (BE) u32 owned_mutexes; - std::function m_custom_task; + std::function custom_task; public: PPUThread(); @@ -799,3 +797,49 @@ protected: }; PPUThread& GetCurrentPPUThread(); + +class ppu_thread : cpu_thread +{ + static const u32 stack_align = 0x10; + vm::ptr argv; + u32 argc; + vm::ptr envp; + +public: + ppu_thread(u32 entry, const std::string& name = "", u32 stack_size = 0, u32 prio = 0); + + cpu_thread& args(std::initializer_list values) override + { + if (!values.size()) + return *this; + + assert(argc == 0); + + envp.set(vm::alloc(align((u32)sizeof(*envp), stack_align), vm::main)); + *envp = 0; + argv.set(vm::alloc(sizeof(*argv) * values.size(), vm::main)); + + for (auto &arg : values) + { + u32 arg_size = align(u32(arg.size() + 1), stack_align); + u32 arg_addr = vm::alloc(arg_size, vm::main); + + std::strcpy(vm::get_ptr(arg_addr), arg.c_str()); + + argv[argc++] = arg_addr; + } + + return *this; + } + + cpu_thread& run() override + { + thread->Run(); + + static_cast(thread)->GPR[3] = argc; + static_cast(thread)->GPR[4] = argv.addr(); + static_cast(thread)->GPR[5] = envp.addr(); + + return *this; + } +}; \ No newline at end of file diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index bf8d55b984..719cdc8cab 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1163,7 +1163,6 @@ void SPUThread::StopAndSignal(u32 code) } default: - { if (!SPU.Out_MBox.GetCount()) { LOG_ERROR(Log::SPU, "Unknown STOP code: 0x%x (no message)", code); @@ -1175,5 +1174,16 @@ void SPUThread::StopAndSignal(u32 code) Emu.Pause(); break; } - } } + +spu_thread::spu_thread(u32 entry, const std::string& name, u32 stack_size, u32 prio) +{ + thread = &Emu.GetCPU().AddThread(CPU_THREAD_SPU); + + thread->SetName(name); + thread->SetEntry(entry); + thread->SetStackSize(stack_size ? stack_size : Emu.GetInfo().GetProcParam().primary_stacksize); + thread->SetPrio(prio ? prio : Emu.GetInfo().GetProcParam().primary_prio); + + argc = 0; +} \ No newline at end of file diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 5553d2fc28..aaffd7d00d 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -583,3 +583,49 @@ protected: }; SPUThread& GetCurrentSPUThread(); + +class spu_thread : cpu_thread +{ + static const u32 stack_align = 0x10; + vm::ptr argv; + u32 argc; + vm::ptr envp; + +public: + spu_thread(u32 entry, const std::string& name = "", u32 stack_size = 0, u32 prio = 0); + + cpu_thread& args(std::initializer_list values) override + { + if (!values.size()) + return *this; + + assert(argc == 0); + + envp.set(Memory.MainMem.AllocAlign((u32)sizeof(envp), stack_align)); + *envp = 0; + argv.set(Memory.MainMem.AllocAlign(u32(sizeof(argv)* values.size()), stack_align)); + + for (auto &arg : values) + { + u32 arg_size = align(u32(arg.size() + 1), stack_align); + u32 arg_addr = Memory.MainMem.AllocAlign(arg_size, stack_align); + + std::strcpy(vm::get_ptr(arg_addr), arg.c_str()); + + argv[argc++] = arg_addr; + } + + return *this; + } + + cpu_thread& run() override + { + thread->Run(); + + static_cast(thread)->GPR[3].from64(argc); + static_cast(thread)->GPR[4].from64(argv.addr()); + static_cast(thread)->GPR[5].from64(envp.addr()); + + return *this; + } +}; \ No newline at end of file diff --git a/rpcs3/Emu/Memory/Memory.cpp b/rpcs3/Emu/Memory/Memory.cpp index 59c6c5feb3..ddb837d725 100644 --- a/rpcs3/Emu/Memory/Memory.cpp +++ b/rpcs3/Emu/Memory/Memory.cpp @@ -108,9 +108,9 @@ void MemoryBase::Init(MemoryType type) memset(RawSPUMem, 0, sizeof(RawSPUMem)); #ifdef _WIN32 - if (!g_base_addr) + if (!vm::g_base_addr) #else - if ((s64)g_base_addr == (s64)-1) + if ((s64)vm::g_base_addr == (s64)-1) #endif { LOG_ERROR(MEMORY, "Initializing memory failed"); @@ -119,7 +119,7 @@ void MemoryBase::Init(MemoryType type) } else { - LOG_NOTICE(MEMORY, "Initializing memory: m_base_addr = 0x%llx", (u64)g_base_addr); + LOG_NOTICE(MEMORY, "Initializing memory: base_addr = 0x%llx", (u64)vm::g_base_addr); } switch (type) @@ -128,6 +128,8 @@ void MemoryBase::Init(MemoryType type) MemoryBlocks.push_back(MainMem.SetRange(0x00010000, 0x2FFF0000)); MemoryBlocks.push_back(UserMemory = PRXMem.SetRange(0x30000000, 0x10000000)); MemoryBlocks.push_back(RSXCMDMem.SetRange(0x40000000, 0x10000000)); + MemoryBlocks.push_back(SPRXMem.SetRange(0x50000000, 0x10000000)); + MemoryBlocks.push_back(MmaperMem.SetRange(0xB0000000, 0x10000000)); MemoryBlocks.push_back(RSXFBMem.SetRange(0xC0000000, 0x10000000)); MemoryBlocks.push_back(StackMem.SetRange(0xD0000000, 0x10000000)); break; @@ -217,6 +219,7 @@ bool MemoryBase::Map(const u64 addr, const u32 size) } MemoryBlocks.push_back((new MemoryBlock())->SetRange(addr, size)); + LOG_WARNING(MEMORY, "Memory mapped at 0x%llx: size=0x%x", addr, size); return true; } @@ -412,12 +415,6 @@ void DynamicMemoryBlockBase::Delete() bool DynamicMemoryBlockBase::AllocFixed(u64 addr, u32 size) { - if (!MemoryBlock::GetStartAddr()) - { - LOG_ERROR(MEMORY, "DynamicMemoryBlockBase::AllocFixed(addr=0x%llx, size=0x%x): memory block not initialized", addr, size); - return false; - } - size = PAGE_4K(size + (addr & 4095)); // align size addr &= ~4095; // align start address @@ -452,7 +449,7 @@ u64 DynamicMemoryBlockBase::AllocAlign(u32 size, u32 align) LOG_ERROR(MEMORY, "DynamicMemoryBlockBase::AllocAlign(size=0x%x, align=0x%x): memory block not initialized", size, align); return 0; } - + size = PAGE_4K(size); u32 exsize; @@ -579,9 +576,9 @@ bool VirtualMemoryBlock::IsInMyRange(const u64 addr, const u32 size) bool VirtualMemoryBlock::IsMyAddress(const u64 addr) { - for(u32 i=0; i= m_mapped_memory[i].addr && addr < m_mapped_memory[i].addr + m_mapped_memory[i].size) + if (addr >= m_mapped_memory[i].addr && addr < m_mapped_memory[i].addr + m_mapped_memory[i].size) { return true; } @@ -620,7 +617,7 @@ u64 VirtualMemoryBlock::Map(u64 realaddr, u32 size) bool VirtualMemoryBlock::Map(u64 realaddr, u32 size, u64 addr) { - if(!IsInMyRange(addr, size) && (IsMyAddress(addr) || IsMyAddress(addr + size - 1))) + if (!IsInMyRange(addr, size) && (IsMyAddress(addr) || IsMyAddress(addr + size - 1))) return false; m_mapped_memory.emplace_back(addr, realaddr, size); @@ -629,9 +626,9 @@ bool VirtualMemoryBlock::Map(u64 realaddr, u32 size, u64 addr) bool VirtualMemoryBlock::UnmapRealAddress(u64 realaddr, u32& size) { - for(u32 i=0; i= m_mapped_memory[i].addr && addr < m_mapped_memory[i].addr + m_mapped_memory[i].size) + if (addr >= m_mapped_memory[i].addr && addr < m_mapped_memory[i].addr + m_mapped_memory[i].size) { result = m_mapped_memory[i].realAddress + (addr - m_mapped_memory[i].addr); return true; @@ -691,9 +688,9 @@ bool VirtualMemoryBlock::getRealAddr(u64 addr, u64& result) u64 VirtualMemoryBlock::getMappedAddress(u64 realAddress) { - for(u32 i=0; i= m_mapped_memory[i].realAddress && realAddress < m_mapped_memory[i].realAddress + m_mapped_memory[i].size) + if (realAddress >= m_mapped_memory[i].realAddress && realAddress < m_mapped_memory[i].realAddress + m_mapped_memory[i].size) { return m_mapped_memory[i].addr + (realAddress - m_mapped_memory[i].realAddress); } @@ -711,7 +708,7 @@ void VirtualMemoryBlock::Delete() bool VirtualMemoryBlock::Reserve(u32 size) { - if(size + GetReservedAmount() > GetEndAddr() - GetStartAddr()) + if (size + GetReservedAmount() > GetEndAddr() - GetStartAddr()) return false; m_reserve_size += size; @@ -720,7 +717,7 @@ bool VirtualMemoryBlock::Reserve(u32 size) bool VirtualMemoryBlock::Unreserve(u32 size) { - if(size > GetReservedAmount()) + if (size > GetReservedAmount()) return false; m_reserve_size -= size; @@ -730,4 +727,4 @@ bool VirtualMemoryBlock::Unreserve(u32 size) u32 VirtualMemoryBlock::GetReservedAmount() { return m_reserve_size; -} +} \ No newline at end of file diff --git a/rpcs3/Emu/Memory/Memory.h b/rpcs3/Emu/Memory/Memory.h index 1ec1201b3c..1fccab32b8 100644 --- a/rpcs3/Emu/Memory/Memory.h +++ b/rpcs3/Emu/Memory/Memory.h @@ -7,8 +7,6 @@ using std::nullptr_t; #define safe_delete(x) do {delete (x);(x)=nullptr;} while(0) #define safe_free(x) do {free(x);(x)=nullptr;} while(0) -extern void* const g_base_addr; - enum MemoryType { Memory_PS3, @@ -24,6 +22,11 @@ enum : u32 RAW_SPU_PROB_OFFSET = 0x00040000, }; +namespace vm +{ + extern void* const g_base_addr; +} + class MemoryBase { std::vector MemoryBlocks; @@ -33,8 +36,10 @@ public: MemoryBlock* UserMemory; DynamicMemoryBlock MainMem; + DynamicMemoryBlock SPRXMem; DynamicMemoryBlock PRXMem; DynamicMemoryBlock RSXCMDMem; + DynamicMemoryBlock MmaperMem; DynamicMemoryBlock RSXFBMem; DynamicMemoryBlock StackMem; MemoryBlock* RawSPUMem[(0x100000000 - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET]; @@ -69,7 +74,7 @@ public: static void* const GetBaseAddr() { - return g_base_addr; + return vm::g_base_addr; } __noinline void InvalidAddress(const char* func, const u64 addr); diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 91708cf50c..95e879e6bb 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -1,23 +1,22 @@ #include "stdafx.h" #include "Memory.h" -#ifdef _WIN32 -#include - -void* const g_base_addr = VirtualAlloc(nullptr, 0x100000000, MEM_RESERVE, PAGE_NOACCESS); -#else -#include - -/* OS X uses MAP_ANON instead of MAP_ANONYMOUS */ -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -void* const g_base_addr = ::mmap(nullptr, 0x100000000, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); -#endif - namespace vm { + #ifdef _WIN32 + #include + void* const g_base_addr = VirtualAlloc(nullptr, 0x100000000, MEM_RESERVE, PAGE_NOACCESS); + #else + #include + + /* OS X uses MAP_ANON instead of MAP_ANONYMOUS */ + #ifndef MAP_ANONYMOUS + #define MAP_ANONYMOUS MAP_ANON + #endif + + void* const g_base_addr = ::mmap(nullptr, 0x100000000, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + #endif + bool check_addr(u32 addr) { // Checking address before using it is unsafe. @@ -28,20 +27,117 @@ namespace vm //TODO bool map(u32 addr, u32 size, u32 flags) { - return false; + return Memory.Map(addr, size); } bool unmap(u32 addr, u32 size, u32 flags) { - return false; + return Memory.Unmap(addr); } - u32 alloc(u32 size) + u32 alloc(u32 addr, u32 size, memory_location location) { - return 0; + return g_locations[location].fixed_allocator(addr, size); } - void unalloc(u32 addr) + u32 alloc(u32 size, memory_location location) { + return g_locations[location].allocator(size); + } + + void dealloc(u32 addr, memory_location location) + { + return g_locations[location].deallocator(addr); + } + + namespace ps3 + { + u32 main_alloc(u32 size) + { + return Memory.MainMem.AllocAlign(size, 1); + } + u32 main_fixed_alloc(u32 addr, u32 size) + { + return Memory.MainMem.AllocFixed(addr, size) ? addr : 0; + } + void main_dealloc(u32 addr) + { + Memory.MainMem.Free(addr); + } + + u32 g_stack_offset = 0; + + u32 stack_alloc(u32 size) + { + return Memory.StackMem.AllocAlign(size, 0x10); + } + u32 stack_fixed_alloc(u32 addr, u32 size) + { + return Memory.StackMem.AllocFixed(addr, size) ? addr : 0; + } + void stack_dealloc(u32 addr) + { + Memory.StackMem.Free(addr); + } + + u32 sprx_alloc(u32 size) + { + return Memory.SPRXMem.AllocAlign(size, 1); + } + u32 sprx_fixed_alloc(u32 addr, u32 size) + { + return Memory.SPRXMem.AllocFixed(Memory.SPRXMem.GetStartAddr() + addr, size) ? Memory.SPRXMem.GetStartAddr() + addr : 0; + } + void sprx_dealloc(u32 addr) + { + Memory.SPRXMem.Free(addr); + } + + u32 user_space_alloc(u32 size) + { + return Memory.PRXMem.AllocAlign(size, 1); + } + u32 user_space_fixed_alloc(u32 addr, u32 size) + { + return Memory.PRXMem.AllocFixed(addr, size) ? addr : 0; + } + void user_space_dealloc(u32 addr) + { + Memory.PRXMem.Free(addr); + } + + void init() + { + Memory.Init(Memory_PS3); + } + } + + namespace psv + { + void init() + { + Memory.Init(Memory_PSV); + } + } + + namespace psp + { + void init() + { + Memory.Init(Memory_PSP); + } + } + + location_info g_locations[memory_location_count] = + { + { 0x00010000, 0x2FFF0000, ps3::main_alloc, ps3::main_fixed_alloc, ps3::main_dealloc }, + { 0x00010000, 0x2FFF0000, ps3::stack_alloc, ps3::stack_fixed_alloc, ps3::stack_dealloc }, + { 0x00010000, 0x2FFF0000, ps3::sprx_alloc, ps3::sprx_fixed_alloc, ps3::sprx_dealloc }, + { 0x00010000, 0x2FFF0000, ps3::user_space_alloc, ps3::user_space_fixed_alloc, ps3::user_space_dealloc }, + }; + + void close() + { + Memory.Close(); } } \ No newline at end of file diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index 44378084be..75a9052412 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -1,17 +1,53 @@ #pragma once +#include "Memory.h" namespace vm { + enum memory_location + { + main, + stack, + sprx, + user_space, + + memory_location_count + }; + + struct location_info + { + u32 addr_offset; + u32 size; + + u32(*allocator)(u32 size); + u32(*fixed_allocator)(u32 addr, u32 size); + void(*deallocator)(u32 addr); + + u32 alloc_offset; + }; + + extern location_info g_locations[memory_location_count]; + + static void set_stack_size(u32 size) {} + static void initialize_stack() {} + + extern void* const g_base_addr; bool map(u32 addr, u32 size, u32 flags); bool unmap(u32 addr, u32 size = 0, u32 flags = 0); - u32 alloc(u32 size); - void unalloc(u32 addr); + u32 alloc(u32 size, memory_location location = user_space); + u32 alloc(u32 addr, u32 size, memory_location location = user_space); + void dealloc(u32 addr, memory_location location = user_space); - template + template T* const get_ptr(u32 addr) { return (T*)((u8*)g_base_addr + addr); } + + template + T* const get_ptr(u64 addr) + { + return get_ptr((u32)addr); + } template T& get_ref(u32 addr) @@ -19,8 +55,16 @@ namespace vm return *get_ptr(addr); } + template + T& get_ref(u64 addr) + { + return get_ref((u32)addr); + } + namespace ps3 { + void init(); + static u8 read8(u32 addr) { return *((u8*)g_base_addr + addr); @@ -36,9 +80,9 @@ namespace vm return re16(*(u16*)((u8*)g_base_addr + addr)); } - static void write16(u32 addr, u16 value) + static void write16(u32 addr, be_t value) { - *(u16*)((u8*)g_base_addr + addr) = re16(value); + *(be_t*)((u8*)g_base_addr + addr) = value; } static u32 read32(u32 addr) @@ -53,11 +97,11 @@ namespace vm } } - static void write32(u32 addr, u32 value) + static void write32(u32 addr, be_t value) { if (addr < RAW_SPU_BASE_ADDR || (addr % RAW_SPU_OFFSET) < RAW_SPU_PROB_OFFSET) { - *(u32*)((u8*)g_base_addr + addr) = re32(value); + *(be_t*)((u8*)g_base_addr + addr) = value; } else { @@ -70,9 +114,24 @@ namespace vm return re64(*(u64*)((u8*)g_base_addr + addr)); } + static void write64(u32 addr, be_t value) + { + *(be_t*)((u8*)g_base_addr + addr) = value; + } + + static void write16(u32 addr, u16 value) + { + write16(addr, be_t::make(value)); + } + + static void write32(u32 addr, u32 value) + { + write32(addr, be_t::make(value)); + } + static void write64(u32 addr, u64 value) { - *(u64*)((u8*)g_base_addr + addr) = re64(value); + write64(addr, be_t::make(value)); } static u128 read128(u32 addr) @@ -88,6 +147,8 @@ namespace vm namespace psv { + void init(); + static u8 read8(u32 addr) { return *((u8*)g_base_addr + addr); @@ -138,8 +199,17 @@ namespace vm *(u128*)((u8*)g_base_addr + addr) = value; } } + + namespace psp + { + using namespace psv; + + void init(); + } + + void close(); } #include "vm_ref.h" #include "vm_ptr.h" -#include "vm_var.h" +#include "vm_var.h" \ No newline at end of file diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h index 7a956f9e4d..c984b18e0a 100644 --- a/rpcs3/Emu/Memory/vm_ptr.h +++ b/rpcs3/Emu/Memory/vm_ptr.h @@ -10,7 +10,7 @@ namespace vm AT m_addr; public: - typedef T type; + typedef typename std::remove_cv::type type; _ptr_base operator++ (int) { @@ -38,49 +38,49 @@ namespace vm return *this; } - _ptr_base& operator += (int count) + _ptr_base& operator += (AT count) { m_addr += count * sizeof(AT); return *this; } - _ptr_base& operator -= (int count) + _ptr_base& operator -= (AT count) { m_addr -= count * sizeof(AT); return *this; } - _ptr_base operator + (int count) const - { - return make(m_addr + count * sizeof(AT)); - } + _ptr_base operator + (typename remove_be_t::type count) const { return make(m_addr + count * sizeof(AT)); } + _ptr_base operator + (typename to_be_t::type count) const { return make(m_addr + count * sizeof(AT)); } + _ptr_base operator - (typename remove_be_t::type count) const { return make(m_addr - count * sizeof(AT)); } + _ptr_base operator - (typename to_be_t::type count) const { return make(m_addr - count * sizeof(AT)); } - _ptr_base operator - (int count) const - { - return make(m_addr - count * sizeof(AT)); - } + __forceinline bool operator <(const _ptr_base& right) const { return m_addr < right.m_addr; } + __forceinline bool operator <=(const _ptr_base& right) const { return m_addr <= right.m_addr; } + __forceinline bool operator >(const _ptr_base& right) const { return m_addr > right.m_addr; } + __forceinline bool operator >=(const _ptr_base& right) const { return m_addr >= right.m_addr; } + __forceinline bool operator ==(const _ptr_base& right) const { return m_addr == right.m_addr; } + __forceinline bool operator !=(const _ptr_base& right) const { return m_addr != right.m_addr; } + __forceinline bool operator ==(const nullptr_t& right) const { return m_addr == 0; } + __forceinline bool operator !=(const nullptr_t& right) const { return m_addr != 0; } + explicit operator bool() const { return m_addr != 0; } __forceinline _ptr_base::value, typename to_be_t::type, AT>>& operator *() const { return vm::get_ref<_ptr_base::value, typename to_be_t::type, AT>>>(m_addr); } - __forceinline _ptr_base::value, typename to_be_t::type, AT>>& operator [](int index) const + __forceinline _ptr_base::value, typename to_be_t::type, AT>>& operator [](AT index) const { return vm::get_ref<_ptr_base::value, typename to_be_t::type, AT>>>(m_addr + sizeof(AT)* index); } - operator bool() const - { - return m_addr != 0; - } - //typedef typename invert_be_t::type AT2; template operator const _ptr_base() const { - typename std::remove_const::type addr; addr = m_addr; + typename std::remove_const::type addr = m_addr; return (_ptr_base&)addr; } @@ -108,6 +108,8 @@ namespace vm AT m_addr; public: + typedef typename std::remove_cv::type type; + __forceinline T* const operator -> () const { return vm::get_ptr(m_addr); @@ -139,38 +141,44 @@ namespace vm return *this; } - _ptr_base& operator += (int count) + _ptr_base& operator += (AT count) { m_addr += count * sizeof(T); return *this; } - _ptr_base& operator -= (int count) + _ptr_base& operator -= (AT count) { m_addr -= count * sizeof(T); return *this; } - _ptr_base operator + (int count) const - { - return make(m_addr + count * sizeof(T)); - } - - _ptr_base operator - (int count) const - { - return make(m_addr - count * sizeof(T)); - } + _ptr_base operator + (typename remove_be_t::type count) const { return make(m_addr + count * sizeof(T)); } + _ptr_base operator + (typename to_be_t::type count) const { return make(m_addr + count * sizeof(T)); } + _ptr_base operator - (typename remove_be_t::type count) const { return make(m_addr - count * sizeof(T)); } + _ptr_base operator - (typename to_be_t::type count) const { return make(m_addr - count * sizeof(T)); } __forceinline T& operator *() const { - return get_ref(m_addr); + return vm::get_ref(m_addr); } - __forceinline T& operator [](int index) const + __forceinline T& operator [](AT index) const { - return get_ref(m_addr + sizeof(T) * index); + return vm::get_ref(m_addr + sizeof(AT)* index); } - + + __forceinline bool operator <(const _ptr_base& right) const { return m_addr < right.m_addr; } + __forceinline bool operator <=(const _ptr_base& right) const { return m_addr <= right.m_addr; } + __forceinline bool operator >(const _ptr_base& right) const { return m_addr > right.m_addr; } + __forceinline bool operator >=(const _ptr_base& right) const { return m_addr >= right.m_addr; } + __forceinline bool operator ==(const _ptr_base& right) const { return m_addr == right.m_addr; } + __forceinline bool operator !=(const _ptr_base& right) const { return m_addr != right.m_addr; } + __forceinline bool operator ==(const nullptr_t& right) const { return m_addr == 0; } + __forceinline bool operator !=(const nullptr_t& right) const { return m_addr != 0; } + explicit operator bool() const { return m_addr != 0; } + explicit operator T*() const { return get_ptr(); } + /* operator _ref_base() { @@ -188,26 +196,27 @@ namespace vm return m_addr; } - void set(const AT value) + void set(AT value) { m_addr = value; } - operator bool() const + /* + operator T*() const { - return m_addr != 0; + return get_ptr(); } - + */ //typedef typename invert_be_t::type AT2; template operator const _ptr_base() const { - typename std::remove_const::type addr; addr = m_addr; + typename std::remove_const::type addr = m_addr; return (_ptr_base&)addr; } - T* const get_ptr() const + T* get_ptr() const { return vm::get_ptr(m_addr); } @@ -236,29 +245,39 @@ namespace vm m_addr = value; } - void* const get_ptr() const + void* get_ptr() const { return vm::get_ptr(m_addr); } - operator bool() const + explicit operator void*() const { - return m_addr != 0; + return get_ptr(); } + __forceinline bool operator <(const _ptr_base& right) const { return m_addr < right.m_addr; } + __forceinline bool operator <=(const _ptr_base& right) const { return m_addr <= right.m_addr; } + __forceinline bool operator >(const _ptr_base& right) const { return m_addr > right.m_addr; } + __forceinline bool operator >=(const _ptr_base& right) const { return m_addr >= right.m_addr; } + __forceinline bool operator ==(const _ptr_base& right) const { return m_addr == right.m_addr; } + __forceinline bool operator !=(const _ptr_base& right) const { return m_addr != right.m_addr; } + __forceinline bool operator ==(const nullptr_t& right) const { return m_addr == 0; } + __forceinline bool operator !=(const nullptr_t& right) const { return m_addr != 0; } + explicit operator bool() const { return m_addr != 0; } + //typedef typename invert_be_t::type AT2; template operator const _ptr_base() const { - typename std::remove_const::type addr; addr = m_addr; + typename std::remove_const::type addr = m_addr; return (_ptr_base&)addr; } template operator const _ptr_base() const { - typename std::remove_const::type addr; addr = m_addr; + typename std::remove_const::type addr = m_addr; return (_ptr_base&)addr; } @@ -286,16 +305,26 @@ namespace vm m_addr = value; } - const void* const get_ptr() const + const void* get_ptr() const { return vm::get_ptr(m_addr); } - operator bool() const + explicit operator const void*() const { - return m_addr != 0; + return get_ptr(); } + __forceinline bool operator <(const _ptr_base& right) const { return m_addr < right.m_addr; } + __forceinline bool operator <=(const _ptr_base& right) const { return m_addr <= right.m_addr; } + __forceinline bool operator >(const _ptr_base& right) const { return m_addr > right.m_addr; } + __forceinline bool operator >=(const _ptr_base& right) const { return m_addr >= right.m_addr; } + __forceinline bool operator ==(const _ptr_base& right) const { return m_addr == right.m_addr; } + __forceinline bool operator !=(const _ptr_base& right) const { return m_addr != right.m_addr; } + __forceinline bool operator ==(const nullptr_t& right) const { return m_addr == 0; } + __forceinline bool operator !=(const nullptr_t& right) const { return m_addr != 0; } + explicit operator bool() const { return m_addr != 0; } + //typedef typename invert_be_t::type AT2; template @@ -335,17 +364,22 @@ namespace vm m_addr = value; } - operator bool() const - { - return m_addr != 0; - } + __forceinline bool operator <(const _ptr_base& right) const { return m_addr < right.m_addr; } + __forceinline bool operator <=(const _ptr_base& right) const { return m_addr <= right.m_addr; } + __forceinline bool operator >(const _ptr_base& right) const { return m_addr > right.m_addr; } + __forceinline bool operator >=(const _ptr_base& right) const { return m_addr >= right.m_addr; } + __forceinline bool operator ==(const _ptr_base& right) const { return m_addr == right.m_addr; } + __forceinline bool operator !=(const _ptr_base& right) const { return m_addr != right.m_addr; } + __forceinline bool operator ==(const nullptr_t& right) const { return m_addr == 0; } + __forceinline bool operator !=(const nullptr_t& right) const { return m_addr != 0; } + explicit operator bool() const { return m_addr != 0; } //typedef typename invert_be_t::type AT2; template operator const _ptr_base() const { - typename std::remove_const::type addr; addr = m_addr; + typename std::remove_const::type addr = m_addr; return (_ptr_base&)addr; } @@ -356,7 +390,7 @@ namespace vm operator const std::function() const { - typename std::remove_const::type addr; addr = m_addr; + typename std::remove_const::type addr = m_addr; return [addr](T... args) -> RT { return make(addr)(args...); }; } @@ -366,7 +400,7 @@ namespace vm //BE pointer to LE data template struct bptrl : public _ptr_base::type> { - static bptrl make(typename to_be_t::type addr) + static bptrl make(AT addr) { return (bptrl&)addr; } @@ -378,7 +412,7 @@ namespace vm //BE pointer to BE data template struct bptrb : public _ptr_base::type, lvl, typename to_be_t::type> { - static bptrb make(typename to_be_t::type addr) + static bptrb make(AT addr) { return (bptrb&)addr; } @@ -413,7 +447,7 @@ namespace vm namespace ps3 { - //default pointer for HLE functions (LE ptrerence to BE data) + //default pointer for HLE functions (LE pointer to BE data) template struct ptr : public lptrb { static ptr make(AT addr) @@ -425,10 +459,10 @@ namespace vm //using lptrb::operator const _ptr_base::type, lvl, AT>; }; - //default pointer for HLE structures (BE ptrerence to BE data) + //default pointer for HLE structures (BE pointer to BE data) template struct bptr : public bptrb { - static bptr make(typename to_be_t::type addr) + static bptr make(AT addr) { return (bptr&)addr; } @@ -440,7 +474,7 @@ namespace vm namespace psv { - //default pointer for HLE functions & structures (LE ptrerence to LE data) + //default pointer for HLE functions & structures (LE pointer to LE data) template struct ptr : public lptrl { static ptr make(AT addr) diff --git a/rpcs3/Emu/SysCalls/ModuleManager.h b/rpcs3/Emu/SysCalls/ModuleManager.h index 33034a9739..5d40cd35ad 100644 --- a/rpcs3/Emu/SysCalls/ModuleManager.h +++ b/rpcs3/Emu/SysCalls/ModuleManager.h @@ -10,6 +10,8 @@ class ModuleManager std::vector m_modules_funcs_list; std::vector m_mod_init; //owner of Module bool initialized; + std::unordered_map m_registered_functions; + public: ModuleManager(); ~ModuleManager(); @@ -24,4 +26,20 @@ public: u32 GetFuncNumById(u32 id); Module* GetModuleByName(const std::string& name); Module* GetModuleById(u16 id); + + void register_function(u32 fnid, u32 fstub) + { + m_registered_functions[fnid] = fstub; + } + + bool get_function_stub(u32 fnid, be_t& res) + { + auto f = m_registered_functions.find(fnid); + + if (f == m_registered_functions.end()) + return false; + + res = f->second; + return true; + } }; \ No newline at end of file diff --git a/rpcs3/Emu/SysCalls/Modules/cellAvconfExt.cpp b/rpcs3/Emu/SysCalls/Modules/cellAvconfExt.cpp index cbb9506377..260c8eb9bb 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellAvconfExt.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellAvconfExt.cpp @@ -16,15 +16,16 @@ int cellVideoOutGetScreenSize(u32 videoOut, vm::ptr screenSize) { cellAvconfExt->Warning("cellVideoOutGetScreenSize(videoOut=%d, screenSize_addr=0x%x)", videoOut, screenSize.addr()); - if (videoOut != CELL_VIDEO_OUT_PRIMARY) + if (!videoOut == CELL_VIDEO_OUT_PRIMARY) return CELL_VIDEO_OUT_ERROR_UNSUPPORTED_VIDEO_OUT; + //TODO: Use virtual screen size #ifdef _WIN32 HDC screen = GetDC(NULL); - float diagonal = roundf(sqrtf((powf(GetDeviceCaps(screen, HORZSIZE), 2) + powf(GetDeviceCaps(screen, VERTSIZE), 2))) * 0.0393); + float diagonal = round(sqrt((pow(GetDeviceCaps(screen, HORZSIZE), 2) + pow(GetDeviceCaps(screen, VERTSIZE), 2))) * 0.0393); #else // TODO: Linux implementation, without using wx - // float diagonal = roundf(sqrtf((powf(wxGetDisplaySizeMM().GetWidth(), 2) + powf(wxGetDisplaySizeMM().GetHeight(), 2))) * 0.0393); + // float diagonal = round(sqrt((pow(wxGetDisplaySizeMM().GetWidth(), 2) + pow(wxGetDisplaySizeMM().GetHeight(), 2))) * 0.0393); #endif if (Ini.GS3DTV.GetValue()) diff --git a/rpcs3/Emu/SysCalls/Modules/cellPamf.cpp b/rpcs3/Emu/SysCalls/Modules/cellPamf.cpp index ac359d0e1c..16a5064293 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellPamf.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellPamf.cpp @@ -77,16 +77,14 @@ int pamfStreamTypeToEsFilterId(u8 type, u8 ch, vm::ptr pEsF u8 pamfGetStreamType(vm::ptr pSelf, u8 stream) { //TODO: get stream type correctly - vm::ptr pAddr(pSelf->pAddr); - - switch (pAddr->stream_headers[stream].type) + switch (pSelf->pAddr->stream_headers[stream].type) { case 0x1b: return CELL_PAMF_STREAM_TYPE_AVC; case 0xdc: return CELL_PAMF_STREAM_TYPE_ATRAC3PLUS; case 0x80: return CELL_PAMF_STREAM_TYPE_PAMF_LPCM; case 0xdd: return CELL_PAMF_STREAM_TYPE_USER_DATA; default: - cellPamf->Todo("pamfGetStreamType: unsupported stream type found(0x%x)", pAddr->stream_headers[stream].type); + cellPamf->Todo("pamfGetStreamType: unsupported stream type found(0x%x)", pSelf->pAddr->stream_headers[stream].type); return 0; } } @@ -94,18 +92,18 @@ u8 pamfGetStreamType(vm::ptr pSelf, u8 stream) u8 pamfGetStreamChannel(vm::ptr pSelf, u8 stream) { //TODO: get stream channel correctly - vm::ptr pAddr(pSelf->pAddr); - switch (pAddr->stream_headers[stream].type) + switch (pSelf->pAddr->stream_headers[stream].type) { case 0x1b: - if ((pAddr->stream_headers[stream].stream_id >= 0xe0) && (pAddr->stream_headers[stream].stream_id <= 0xef)) + if ((pSelf->pAddr->stream_headers[stream].stream_id >= 0xe0) && (pSelf->pAddr->stream_headers[stream].stream_id <= 0xef)) { - return pAddr->stream_headers[stream].stream_id - 0xe0; + return pSelf->pAddr->stream_headers[stream].stream_id - 0xe0; } else { - cellPamf->Error("pamfGetStreamChannel: stream type 0x%x got invalid stream id=0x%x", pAddr->stream_headers[stream].type, pAddr->stream_headers[stream].stream_id); + cellPamf->Error("pamfGetStreamChannel: stream type 0x%x got invalid stream id=0x%x", + pSelf->pAddr->stream_headers[stream].type, pSelf->pAddr->stream_headers[stream].stream_id); return 0; } case 0xdc: @@ -118,7 +116,7 @@ u8 pamfGetStreamChannel(vm::ptr pSelf, u8 stream) cellPamf->Todo("pamfGetStreamChannel: CELL_PAMF_STREAM_TYPE_USER_DATA"); return 0; default: - cellPamf->Todo("pamfGetStreamType: unsupported stream type found(0x%x)", pAddr->stream_headers[stream].type); + cellPamf->Todo("pamfGetStreamType: unsupported stream type found(0x%x)", pSelf->pAddr->stream_headers[stream].type); return 0; } @@ -200,10 +198,8 @@ int cellPamfReaderGetPresentationStartTime(vm::ptr pSelf, vm::pt return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - const u32 upper = (u16)pAddr->start_pts_high; - pTimeStamp->upper = upper; - pTimeStamp->lower = pAddr->start_pts_low; + pTimeStamp->upper = (u32)(u16)pSelf->pAddr->start_pts_high; + pTimeStamp->lower = pSelf->pAddr->start_pts_low; return CELL_OK; } @@ -215,10 +211,8 @@ int cellPamfReaderGetPresentationEndTime(vm::ptr pSelf, vm::ptr< return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - const u32 upper = (u16)pAddr->end_pts_high; - pTimeStamp->upper = upper; - pTimeStamp->lower = pAddr->end_pts_low; + pTimeStamp->upper = (u32)(u16)pSelf->pAddr->end_pts_high; + pTimeStamp->lower = pSelf->pAddr->end_pts_low; return CELL_OK; } @@ -230,8 +224,7 @@ int cellPamfReaderGetMuxRateBound(vm::ptr pSelf) return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - return pAddr->mux_rate_max; + return pSelf->pAddr->mux_rate_max; } int cellPamfReaderGetNumberOfStreams(vm::ptr pSelf) @@ -242,8 +235,7 @@ int cellPamfReaderGetNumberOfStreams(vm::ptr pSelf) return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - return pAddr->stream_count; + return pSelf->pAddr->stream_count; } int cellPamfReaderGetNumberOfSpecificStreams(vm::ptr pSelf, u8 streamType) @@ -254,11 +246,9 @@ int cellPamfReaderGetNumberOfSpecificStreams(vm::ptr pSelf, u8 s return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - int counts[6] = {0, 0, 0, 0, 0, 0}; - for (u8 i = 0; i < pAddr->stream_count; i++) + for (u8 i = 0; i < pSelf->pAddr->stream_count; i++) { counts[pamfGetStreamType(pSelf, i)]++; } @@ -289,18 +279,14 @@ int cellPamfReaderSetStreamWithIndex(vm::ptr pSelf, u8 streamInd return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - - if (streamIndex < pAddr->stream_count) + if (streamIndex < pSelf->pAddr->stream_count) { pSelf->stream = streamIndex; return CELL_OK; } - else - { - cellPamf->Error("cellPamfReaderSetStreamWithIndex: CELL_PAMF_ERROR_INVALID_ARG"); - return CELL_PAMF_ERROR_INVALID_ARG; - } + + cellPamf->Error("cellPamfReaderSetStreamWithIndex: CELL_PAMF_ERROR_INVALID_ARG"); + return CELL_PAMF_ERROR_INVALID_ARG; } int cellPamfReaderSetStreamWithTypeAndChannel(vm::ptr pSelf, u8 streamType, u8 ch) @@ -311,8 +297,6 @@ int cellPamfReaderSetStreamWithTypeAndChannel(vm::ptr pSelf, u8 return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - if (streamType > 5) { cellPamf->Error("cellPamfReaderSetStreamWithTypeAndChannel: invalid stream type(%d)", streamType); @@ -320,7 +304,7 @@ int cellPamfReaderSetStreamWithTypeAndChannel(vm::ptr pSelf, u8 return CELL_PAMF_ERROR_INVALID_ARG; } - for (u8 i = 0; i < pAddr->stream_count; i++) + for (u8 i = 0; i < pSelf->pAddr->stream_count; i++) { if (pamfGetStreamType(pSelf, i) == streamType) { @@ -343,11 +327,9 @@ int cellPamfReaderSetStreamWithTypeAndIndex(vm::ptr pSelf, u8 st return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - u32 found = 0; - for (u8 i = 0; i < pAddr->stream_count; i++) + for (u8 i = 0; i < pSelf->pAddr->stream_count; i++) { const u8 type = pamfGetStreamType(pSelf, i); @@ -426,8 +408,6 @@ int cellPamfReaderGetStreamInfo(vm::ptr pSelf, u32 pInfo_addr, u return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - memset(vm::get_ptr(pInfo_addr), 0, size); switch (pamfGetStreamType(pSelf, pSelf->stream)) @@ -486,7 +466,7 @@ int cellPamfReaderGetStreamInfo(vm::ptr pSelf, u32 pInfo_addr, u case CELL_PAMF_STREAM_TYPE_AC3: { auto pInfo = vm::ptr::make(pInfo_addr); - auto pAudio = vm::ptr::make(pSelf->pAddr + 0x98 + pSelf->stream * 0x30); + auto pAudio = vm::ptr::make(pSelf->pAddr.addr() + 0x98 + pSelf->stream * 0x30); if (size != sizeof(CellPamfAc3Info)) { @@ -501,7 +481,7 @@ int cellPamfReaderGetStreamInfo(vm::ptr pSelf, u32 pInfo_addr, u case CELL_PAMF_STREAM_TYPE_PAMF_LPCM: { auto pInfo = vm::ptr::make(pInfo_addr); - auto pAudio = vm::ptr::make(pSelf->pAddr + 0x98 + pSelf->stream * 0x30); + auto pAudio = vm::ptr::make(pSelf->pAddr.addr() + 0x98 + pSelf->stream * 0x30); if (size != sizeof(CellPamfLpcmInfo)) { @@ -537,8 +517,7 @@ int cellPamfReaderGetNumberOfEp(vm::ptr pSelf) return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - return pAddr->stream_headers[pSelf->stream].ep_num; + return pSelf->pAddr->stream_headers[pSelf->stream].ep_num; } int cellPamfReaderGetEpIteratorWithIndex(vm::ptr pSelf, u32 epIndex, vm::ptr pIt) @@ -549,7 +528,6 @@ int cellPamfReaderGetEpIteratorWithIndex(vm::ptr pSelf, u32 epIn return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); //TODO: return CELL_OK; } @@ -562,8 +540,6 @@ int cellPamfReaderGetEpIteratorWithTimeStamp(vm::ptr pSelf, vm:: return CELL_PAMF_ERROR_INVALID_PAMF; } - vm::ptr pAddr(pSelf->pAddr); - //TODO: return CELL_OK; diff --git a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp index 78c7054e82..86db920f9f 100644 --- a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp +++ b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp @@ -14,7 +14,7 @@ #include "Emu/SysCalls/lv2/sys_time.h" #include "Emu/SysCalls/lv2/sys_mmapper.h" #include "Emu/SysCalls/lv2/sys_lwcond.h" -#include "Loader/ELF.h" +#include "Loader/ELF32.h" #include "Crypto/unself.h" #include "Emu/Cell/RawSPUThread.h" #include "sysPrxForUser.h" @@ -130,11 +130,10 @@ int sys_raw_spu_load(s32 id, vm::ptr path, vm::ptr entry) f.Seek(0); - ELFLoader l(f); - l.LoadInfo(); - l.LoadData(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id); + u32 _entry; + LoadSpuImage(f, _entry, RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id); - *entry = l.GetEntry(); + *entry = _entry; return CELL_OK; } diff --git a/rpcs3/Emu/SysCalls/Modules/sys_net.cpp b/rpcs3/Emu/SysCalls/Modules/sys_net.cpp index 90e781ead1..9f550557c1 100644 --- a/rpcs3/Emu/SysCalls/Modules/sys_net.cpp +++ b/rpcs3/Emu/SysCalls/Modules/sys_net.cpp @@ -96,7 +96,7 @@ int sys_net_accept(s32 s, vm::ptr addr, vm::ptr pad { sys_net->Warning("accept(s=%d, family_addr=0x%x, paddrlen=0x%x)", s, addr.addr(), paddrlen.addr()); if (!addr) { - int ret = accept(s, NULL, NULL); + int ret = accept(s, nullptr, nullptr); *g_lastError = getLastError(); return ret; } diff --git a/rpcs3/Emu/SysCalls/lv2/sys_mmapper.cpp b/rpcs3/Emu/SysCalls/lv2/sys_mmapper.cpp index 53891b7565..ba0242383d 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_mmapper.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_mmapper.cpp @@ -24,13 +24,13 @@ s32 sys_mmapper_allocate_address(u32 size, u64 flags, u32 alignment, u32 alloc_a { default: case SYS_MEMORY_PAGE_SIZE_1M: - if(AlignAddr(size, alignment) & 0xfffff) + if(align(size, alignment) & 0xfffff) return CELL_EALIGN; addr = (u32)Memory.Alloc(size, 0x100000); break; case SYS_MEMORY_PAGE_SIZE_64K: - if(AlignAddr(size, alignment) & 0xffff) + if (align(size, alignment) & 0xffff) return CELL_EALIGN; addr = (u32)Memory.Alloc(size, 0x10000); break; diff --git a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp index fb7772db2e..3249dd6cd4 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp @@ -160,11 +160,11 @@ PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool i new_thread.m_has_interrupt = false; new_thread.m_is_interrupt = is_interrupt; new_thread.SetName(name); - new_thread.m_custom_task = task; + new_thread.custom_task = task; sys_ppu_thread.Notice("*** New PPU Thread [%s] (%s, entry=0x%x): id = %d", name.c_str(), is_interrupt ? "interrupt" : - (is_joinable ? "joinable" : "non-joinable"), entry, id); + (is_joinable ? "joinable" : "detached"), entry, id); if (!is_interrupt) { @@ -194,15 +194,13 @@ s32 sys_ppu_thread_create(vm::ptr thread_id, u32 entry, u64 arg, s32 prio, { case 0: break; case SYS_PPU_THREAD_CREATE_JOINABLE: - { is_joinable = true; break; - } + case SYS_PPU_THREAD_CREATE_INTERRUPT: - { is_interrupt = true; break; - } + default: sys_ppu_thread.Error("sys_ppu_thread_create(): unknown flags value (0x%llx)", flags); return CELL_EPERM; } diff --git a/rpcs3/Emu/SysCalls/lv2/sys_prx.cpp b/rpcs3/Emu/SysCalls/lv2/sys_prx.cpp index 3f1ff87e22..efdb47f96c 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_prx.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_prx.cpp @@ -34,10 +34,10 @@ s32 sys_prx_load_module(vm::ptr path, u64 flags, vm::ptrsize = (u32)f.GetSize(); prx->address = (u32)Memory.Alloc(prx->size, 4); - prx->path = path; + prx->path = (const char*)path; // Load the PRX into memory - f.Read(vm::get_ptr(prx->address), prx->size); + f.Read(vm::get_ptr(prx->address), prx->size); u32 id = sys_prx.GetNewId(prx, TYPE_PRX); return id; diff --git a/rpcs3/Emu/SysCalls/lv2/sys_prx.h b/rpcs3/Emu/SysCalls/lv2/sys_prx.h index e83daad0be..2e024c1bde 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_prx.h +++ b/rpcs3/Emu/SysCalls/lv2/sys_prx.h @@ -28,11 +28,74 @@ enum CELL_PRX_ERROR_ELF_IS_REGISTERED = 0x80011910, // Fixed ELF is already registered }; +// Information about imported or exported libraries in PRX modules +struct sys_prx_library_info_t +{ + u8 size; + u8 unk0; + be_t version; + be_t attributes; + be_t num_func; + be_t num_var; + be_t num_tlsvar; + u8 info_hash; + u8 info_tlshash; + u8 unk1[2]; + be_t name_addr; + be_t fnid_addr; + be_t fstub_addr; + be_t unk4; + be_t unk5; + be_t unk6; + be_t unk7; +}; + +// ELF file headers +struct sys_prx_param_t +{ + be_t size; + be_t magic; + be_t version; + be_t unk0; + be_t libentstart; + be_t libentend; + vm::bptr libstubstart; + vm::bptr libstubend; + be_t ver; + be_t unk1; + be_t unk2; +}; + +// PRX file headers +struct sys_prx_module_info_t +{ + be_t attributes; + be_t version; + s8 name[28]; + be_t toc; + vm::bptr exports_start; + vm::bptr exports_end; + be_t imports_start; + be_t imports_end; +}; + +// Relocation information of the SCE_PPURELA segment +struct sys_prx_relocation_info_t +{ + be_t offset; + be_t unk0; + u8 index_value; + u8 index_addr; + be_t type; + vm::bptr ptr; +}; + + // Data types struct sys_prx_load_module_option_t { be_t size; - be_t base_addr; // void* + vm::bptr base_addr; }; struct sys_prx_start_module_option_t diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp index 4c7163f3d2..1f6a00bbdf 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp @@ -7,20 +7,26 @@ #include "Emu/Cell/RawSPUThread.h" #include "Emu/FS/vfsStreamMemory.h" #include "Emu/FS/vfsFile.h" -#include "Loader/ELF.h" +#include "Loader/elf32.h" #include "Crypto/unself.h" #include "sys_spu.h" static SysCallBase sys_spu("sys_spu"); +void LoadSpuImage(vfsStream& stream, u32& spu_ep, u32 addr) +{ + loader::handlers::elf32 h; + h.init(stream); + h.load_data(addr); + spu_ep = h.m_ehdr.data_be.e_entry; +} + u32 LoadSpuImage(vfsStream& stream, u32& spu_ep) { - ELFLoader l(stream); - l.LoadInfo(); const u32 alloc_size = 256 * 1024; - u32 spu_offset = (u32)Memory.MainMem.AllocAlign(alloc_size); - l.LoadData(spu_offset); - spu_ep = l.GetEntry(); + u32 spu_offset = (u32)vm::alloc(alloc_size, vm::main); + + LoadSpuImage(stream, spu_ep, spu_offset); return spu_offset; } diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.h b/rpcs3/Emu/SysCalls/lv2/sys_spu.h index e9886b0e0d..a07e6d0ca3 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.h +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.h @@ -154,6 +154,10 @@ struct SpuGroupInfo }; class SPUThread; +struct vfsStream; + +void LoadSpuImage(vfsStream& stream, u32& spu_ep, u32 addr); +u32 LoadSpuImage(vfsStream& stream, u32& spu_ep); // Aux s32 spu_image_import(sys_spu_image& img, u32 src, u32 type); diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index e6c3ff9e3f..7c6919724f 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -57,6 +57,8 @@ Emulator::Emulator() , m_sync_prim_manager(new SyncPrimManager()) , m_vfs(new VFS()) { + m_loader.register_handler(new loader::handlers::elf32); + m_loader.register_handler(new loader::handlers::elf64); } Emulator::~Emulator() @@ -235,78 +237,13 @@ void Emulator::Load() return; } - bool is_error; - Loader l(f); - - try + if (!m_loader.load(f)) { - if(!(is_error = !l.Analyze())) - { - switch(l.GetMachine()) - { - case MACHINE_SPU: - Memory.Init(Memory_PS3); - Memory.MainMem.AllocFixed(Memory.MainMem.GetStartAddr(), 0x40000); - break; - - case MACHINE_PPC64: - Memory.Init(Memory_PS3); - break; - - case MACHINE_MIPS: - Memory.Init(Memory_PSP); - break; - - case MACHINE_ARM: - Memory.Init(Memory_PSV); - break; - - default: - throw std::string("Unknown machine!"); - } - - is_error = !l.Load(); - } - - } - catch(const std::string& e) - { - LOG_ERROR(LOADER, "%s", e.c_str()); - is_error = true; - } - catch(...) - { - LOG_ERROR(LOADER, "Unhandled loader error."); - is_error = true; - } - - CPUThreadType thread_type; - - if(!is_error) - { - switch(l.GetMachine()) - { - case MACHINE_PPC64: thread_type = CPU_THREAD_PPU; break; - case MACHINE_SPU: thread_type = CPU_THREAD_SPU; break; - case MACHINE_ARM: thread_type = CPU_THREAD_ARMv7; break; - - default: - LOG_ERROR(LOADER, "Unimplemented thread type for machine."); - is_error = true; - break; - } - } - - if(is_error) - { - Memory.Close(); - Stop(); + LOG_ERROR(LOADER, "Loading '%s' failed", m_elf_path.c_str()); + vm::close(); return; } - // setting default values - Emu.m_sdk_version = -1; // possibly "unknown" value - // trying to load some info from PARAM.SFO vfsFile f2("/app_home/PARAM.SFO"); if (f2.IsOpened()) @@ -326,83 +263,6 @@ void Emulator::Load() LoadPoints(BreakPointsDBName); - CPUThread& thread = GetCPU().AddThread(thread_type); - - switch(l.GetMachine()) - { - case MACHINE_SPU: - LOG_NOTICE(LOADER, "offset = 0x%llx", Memory.MainMem.GetStartAddr()); - LOG_NOTICE(LOADER, "max addr = 0x%x", l.GetMaxAddr()); - thread.SetOffset(Memory.MainMem.GetStartAddr()); - thread.SetEntry(l.GetEntry() - Memory.MainMem.GetStartAddr()); - thread.Run(); - break; - - case MACHINE_PPC64: - { - m_rsx_callback = (u32)Memory.MainMem.AllocAlign(4 * 4) + 4; - vm::write32(m_rsx_callback - 4, m_rsx_callback); - - auto callback_data = vm::ptr::make(m_rsx_callback); - callback_data[0] = ADDI(11, 0, 0x3ff); - callback_data[1] = SC(2); - callback_data[2] = BCLR(0x10 | 0x04, 0, 0, 0); - - m_ppu_thr_exit = (u32)Memory.MainMem.AllocAlign(4 * 4); - - auto ppu_thr_exit_data = vm::ptr::make(m_ppu_thr_exit); - //ppu_thr_exit_data += ADDI(3, 0, 0); // why it kills return value (GPR[3]) ? - ppu_thr_exit_data[0] = ADDI(11, 0, 41); - ppu_thr_exit_data[1] = SC(2); - ppu_thr_exit_data[2] = BCLR(0x10 | 0x04, 0, 0, 0); - - m_ppu_thr_stop = (u32)Memory.MainMem.AllocAlign(2 * 4); - - auto ppu_thr_stop_data = vm::ptr::make(m_ppu_thr_stop); - ppu_thr_stop_data[0] = SC(4); - ppu_thr_stop_data[1] = BCLR(0x10 | 0x04, 0, 0, 0); - - vm::write64(Memory.PRXMem.AllocAlign(0x10000), 0xDEADBEEFABADCAFE); - - thread.SetEntry(l.GetEntry()); - thread.SetStackSize(0x10000); - thread.SetPrio(0x50); - thread.Run(); - - u32 arg1 = Memory.MainMem.AllocAlign(m_elf_path.size() + 1 + 0x20, 0x10) + 0x20; - memcpy(vm::get_ptr(arg1), m_elf_path.c_str(), m_elf_path.size() + 1); - u32 argv = arg1 - 0x20; - vm::write64(argv, arg1); - - static_cast(thread).GPR[3] = 1; // arg count - static_cast(thread).GPR[4] = argv; // probably, args** - static_cast(thread).GPR[5] = argv + 0x10; // unknown - static_cast(thread).GPR[6] = 0; // unknown - static_cast(thread).GPR[12] = Emu.GetMallocPageSize(); // ??? - //thread.AddArgv("-emu"); - } - break; - - case MACHINE_ARM: - { - u32 entry = l.GetEntry(); - - auto code = vm::psv::ptr::make(entry & ~3); - - // evil way to find entry point in .sceModuleInfo.rodata - while (code[0] != 0xffffffffu) - { - entry = code[0] + 0x81000000; - code++; - } - - thread.SetEntry(entry & ~1); - thread.Run(); - - break; - } - } - m_status = Ready; GetGSManager().Init(); @@ -415,7 +275,6 @@ void Emulator::Load() void Emulator::Run() { - if(!IsReady()) { Load(); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 05c8927eee..7a7aacff96 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -47,7 +47,8 @@ public: proc_param.malloc_pagesize = 0x100000; proc_param.sdk_version = 0x360001; - //TODO + proc_param.primary_stacksize = 0x100000; + proc_param.primary_prio = 0x50; } void SetTLSData(const u64 addr, const u64 filesz, const u64 memsz) @@ -106,6 +107,7 @@ class Emulator VFS* m_vfs; EmuInfo m_info; + loader::loader m_loader; public: std::string m_path; @@ -121,6 +123,11 @@ public: void SetPath(const std::string& path, const std::string& elf_path = ""); void SetTitleID(const std::string& id); + std::string GetPath() const + { + return m_elf_path; + } + std::recursive_mutex& GetCoreMutex() { return m_core_mutex; } CPUThreadManager& GetCPU() { return *m_thread_manager; } @@ -149,6 +156,21 @@ public: m_info.SetTLSData(addr, filesz, memsz); } + void SetRSXCallback(u32 addr) + { + m_rsx_callback = addr; + } + + void SetPPUThreadExit(u32 addr) + { + m_ppu_thr_exit = addr; + } + + void SetPPUThreadStop(u32 addr) + { + m_ppu_thr_stop = addr; + } + EmuInfo& GetInfo() { return m_info; } u64 GetTLSAddr() const { return m_info.GetTLSAddr(); } diff --git a/rpcs3/Gui/CompilerELF.cpp b/rpcs3/Gui/CompilerELF.cpp index c4254163e2..0dcd836eca 100644 --- a/rpcs3/Gui/CompilerELF.cpp +++ b/rpcs3/Gui/CompilerELF.cpp @@ -421,5 +421,5 @@ void CompilerELF::SetOpStyle(const std::string& text, const wxColour& color, boo void CompilerELF::DoAnalyzeCode(bool compile) { - CompilePPUProgram(fmt::ToUTF8(asm_list->GetValue()), "compiled.elf", asm_list, hex_list, err_list, !compile).Compile(); +// CompilePPUProgram(fmt::ToUTF8(asm_list->GetValue()), "compiled.elf", asm_list, hex_list, err_list, !compile).Compile(); } diff --git a/rpcs3/Gui/DisAsmFrame.cpp b/rpcs3/Gui/DisAsmFrame.cpp index e4c4fe2c17..c7c52acff9 100644 --- a/rpcs3/Gui/DisAsmFrame.cpp +++ b/rpcs3/Gui/DisAsmFrame.cpp @@ -1,4 +1,5 @@ #include "stdafx_gui.h" +#if 0 #include "Utilities/Log.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" @@ -512,3 +513,4 @@ void DisAsmFrame::MouseWheel(wxMouseEvent& event) event.Skip(); } +#endif \ No newline at end of file diff --git a/rpcs3/Gui/DisAsmFrame.h b/rpcs3/Gui/DisAsmFrame.h index 0f9f30b271..4e2bf0b3b9 100644 --- a/rpcs3/Gui/DisAsmFrame.h +++ b/rpcs3/Gui/DisAsmFrame.h @@ -1,5 +1,5 @@ #pragma once - +#if 0 class PPCThread; class DisAsmFrame : public wxFrame @@ -31,4 +31,6 @@ public: } virtual void AddLine(const wxString line); -}; \ No newline at end of file +}; + +#endif \ No newline at end of file diff --git a/rpcs3/Gui/LLEModulesManager.cpp b/rpcs3/Gui/LLEModulesManager.cpp new file mode 100644 index 0000000000..387e0e6433 --- /dev/null +++ b/rpcs3/Gui/LLEModulesManager.cpp @@ -0,0 +1,65 @@ +#include "stdafx_gui.h" +#include "Utilities/Log.h" +#include "Loader/ELF64.h" +#include "Emu/FS/vfsDir.h" +#include "Emu/FS/vfsFile.h" +#include "LLEModulesManager.h" +#include "Emu/System.h" +#include "Emu/FS/VFS.h" + +LLEModulesManagerFrame::LLEModulesManagerFrame(wxWindow* parent) : FrameBase(parent, wxID_ANY, "", "LLEModulesManagerFrame", wxSize(800, 600)) +{ + wxBoxSizer *s_panel = new wxBoxSizer(wxVERTICAL); + wxBoxSizer *s_p_panel = new wxBoxSizer(wxVERTICAL); + wxPanel *p_main = new wxPanel(this); + m_check_list = new wxCheckListBox(p_main, wxID_ANY); + s_p_panel->Add(m_check_list, 0, wxEXPAND | wxALL, 5); + p_main->SetSizerAndFit(s_p_panel); + s_panel->Add(p_main, 0, wxEXPAND | wxALL, 5); + SetSizerAndFit(s_panel); + + Refresh(); + //Bind(wxEVT_CHECKLISTBOX, [this](wxCommandEvent& event) { UpdateSelection(); }); + Bind(wxEVT_SIZE, [p_main, this](wxSizeEvent& event) { p_main->SetSize(GetClientSize()); m_check_list->SetSize(p_main->GetClientSize() - wxSize(10, 10)); }); +} + +void LLEModulesManagerFrame::Refresh() +{ + m_check_list->Clear(); + + std::string path = "/dev_flash/sys/external/"; + + Emu.GetVFS().Init(path); + + vfsDir dir(path); + + loader::handlers::elf64 sprx_loader; + for (const DirEntryInfo* info = dir.Read(); info; info = dir.Read()) + { + if (info->flags & DirEntry_TypeFile) + { + vfsFile f(path + info->name); + if (sprx_loader.init(f) != loader::handler::ok) + { + continue; + } + + if (!sprx_loader.is_sprx()) + { + continue; + } + + sprx_loader.load(); + + m_check_list->Check(m_check_list->Append(sprx_loader.sprx_get_module_name() + + " v" + std::to_string((int)sprx_loader.m_sprx_module_info.version[0]) + "." + std::to_string((int)sprx_loader.m_sprx_module_info.version[1]))); + } + } + + Emu.GetVFS().UnMountAll(); +} + +void LLEModulesManagerFrame::UpdateSelection() +{ + +} \ No newline at end of file diff --git a/rpcs3/Gui/LLEModulesManager.h b/rpcs3/Gui/LLEModulesManager.h new file mode 100644 index 0000000000..f6344bba3b --- /dev/null +++ b/rpcs3/Gui/LLEModulesManager.h @@ -0,0 +1,13 @@ +#pragma once +#include "Gui/FrameBase.h" +#include + +class LLEModulesManagerFrame : public FrameBase +{ + wxCheckListBox *m_check_list; + +public: + LLEModulesManagerFrame(wxWindow *parent); + void Refresh(); + void UpdateSelection(); +}; \ No newline at end of file diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index b3e5da35ad..a21acdb18d 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -20,6 +20,7 @@ #include "Gui/KernelExplorer.h" #include "Gui/MemoryViewer.h" #include "Gui/RSXDebugger.h" +#include "Gui/LLEModulesManager.h" #include @@ -44,6 +45,7 @@ enum IDs id_config_vhdd_manager, id_config_autopause_manager, id_config_savedata_manager, + id_config_lle_modules_manager, id_tools_compiler, id_tools_kernel_explorer, id_tools_memory_viewer, @@ -98,6 +100,8 @@ MainFrame::MainFrame() menu_conf->Append(id_config_vfs_manager, "Virtual File System Manager"); menu_conf->Append(id_config_vhdd_manager, "Virtual HDD Manager"); menu_conf->Append(id_config_savedata_manager, "Save Data Utility"); + menu_conf->Append(id_config_lle_modules_manager, "LLE Modules Manager"); + wxMenu* menu_tools = new wxMenu(); menubar->Append(menu_tools, "Tools"); @@ -137,6 +141,7 @@ MainFrame::MainFrame() Bind(wxEVT_MENU, &MainFrame::ConfigVHDD, this, id_config_vhdd_manager); Bind(wxEVT_MENU, &MainFrame::ConfigAutoPause, this, id_config_autopause_manager); Bind(wxEVT_MENU, &MainFrame::ConfigSaveData, this, id_config_savedata_manager); + Bind(wxEVT_MENU, &MainFrame::ConfigLLEModules, this, id_config_lle_modules_manager); Bind(wxEVT_MENU, &MainFrame::OpenELFCompiler, this, id_tools_compiler); Bind(wxEVT_MENU, &MainFrame::OpenKernelExplorer, this, id_tools_kernel_explorer); @@ -673,6 +678,11 @@ void MainFrame::ConfigSaveData(wxCommandEvent& event) SaveDataListDialog(this, true).ShowModal(); } +void MainFrame::ConfigLLEModules(wxCommandEvent& event) +{ + (new LLEModulesManagerFrame(this))->Show(); +} + void MainFrame::OpenELFCompiler(wxCommandEvent& WXUNUSED(event)) { (new CompilerELF(this)) -> Show(); diff --git a/rpcs3/Gui/MainFrame.h b/rpcs3/Gui/MainFrame.h index 3e1b625048..359e932c6a 100644 --- a/rpcs3/Gui/MainFrame.h +++ b/rpcs3/Gui/MainFrame.h @@ -40,6 +40,7 @@ private: void ConfigVHDD(wxCommandEvent& event); void ConfigAutoPause(wxCommandEvent& event); void ConfigSaveData(wxCommandEvent& event); + void ConfigLLEModules(wxCommandEvent& event); void OpenELFCompiler(wxCommandEvent& evt); void OpenKernelExplorer(wxCommandEvent& evt); void OpenMemoryViewer(wxCommandEvent& evt); diff --git a/rpcs3/Loader/ELF.cpp b/rpcs3/Loader/ELF.cpp deleted file mode 100644 index b3708bf910..0000000000 --- a/rpcs3/Loader/ELF.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include "stdafx.h" -#include "Emu/FS/vfsStream.h" -#include "ELF.h" -#include "ELF64.h" -#include "ELF32.h" - -void Elf_Ehdr::Show() -{ -} - -void Elf_Ehdr::Load(vfsStream& f) -{ - e_magic = Read32(f); - e_class = Read8(f); -} - -ELFLoader::ELFLoader(vfsStream& f) - : m_elf_file(f) - , LoaderBase() - , m_loader(nullptr) -{ -} - -bool ELFLoader::LoadInfo() -{ - if(!m_elf_file.IsOpened()) - return false; - - m_elf_file.Seek(0); - ehdr.Load(m_elf_file); - if(!ehdr.CheckMagic()) - return false; - - switch(ehdr.GetClass()) - { - case CLASS_ELF32: - m_loader = new ELF32Loader(m_elf_file); - break; - case CLASS_ELF64: - m_loader = new ELF64Loader(m_elf_file); - break; - } - - if(!(m_loader && m_loader->LoadInfo())) - return false; - - entry = m_loader->GetEntry(); - machine = m_loader->GetMachine(); - - return true; -} - -bool ELFLoader::LoadData(u64 offset) -{ - return m_loader && m_loader->LoadData(offset); -} - -bool ELFLoader::Close() -{ - delete m_loader; - m_loader = nullptr; - return m_elf_file.Close(); -} diff --git a/rpcs3/Loader/ELF.h b/rpcs3/Loader/ELF.h deleted file mode 100644 index 44b888b9ff..0000000000 --- a/rpcs3/Loader/ELF.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include "Loader.h" - -struct vfsStream; - -enum ElfClass -{ - CLASS_Unknown, - CLASS_ELF32, - CLASS_ELF64, -}; - -struct Elf_Ehdr -{ - u32 e_magic; - u8 e_class; - - virtual void Show(); - - virtual void Load(vfsStream& f); - - bool CheckMagic() const { return e_magic == 0x7F454C46; } - - ElfClass GetClass() const - { - switch(e_class) - { - case 1: return CLASS_ELF32; - case 2: return CLASS_ELF64; - } - - return CLASS_Unknown; - } -}; - -class ELFLoader : public LoaderBase -{ - vfsStream& m_elf_file; - LoaderBase* m_loader; - -public: - Elf_Ehdr ehdr; - - ELFLoader(vfsStream& f); - virtual ~ELFLoader() - { - Close(); - } - - virtual bool LoadInfo(); - virtual bool LoadData(u64 offset = 0); - virtual bool Close(); -}; \ No newline at end of file diff --git a/rpcs3/Loader/ELF32.cpp b/rpcs3/Loader/ELF32.cpp index 1d35143e01..aa8d7d2dd3 100644 --- a/rpcs3/Loader/ELF32.cpp +++ b/rpcs3/Loader/ELF32.cpp @@ -3,623 +3,127 @@ #include "Utilities/rFile.h" #include "Emu/FS/vfsStream.h" #include "Emu/Memory/Memory.h" -#include "Emu/ARMv7/PSVFuncList.h" #include "ELF32.h" +#include "Emu/Cell/SPUThread.h" +#include "Emu/ARMv7/ARMv7Thread.h" +#include "Emu/System.h" -//#define LOADER_DEBUG - -void Elf32_Ehdr::Show() +namespace loader { -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, "Magic: %08x", e_magic); - LOG_NOTICE(LOADER, "Class: %s", "ELF32"); - LOG_NOTICE(LOADER, "Data: %s", Ehdr_DataToString(e_data).c_str()); - LOG_NOTICE(LOADER, "Current Version: %d", e_curver); - LOG_NOTICE(LOADER, "OS/ABI: %s", Ehdr_OS_ABIToString(e_os_abi).c_str()); - LOG_NOTICE(LOADER, "ABI version: %lld", e_abi_ver); - LOG_NOTICE(LOADER, "Type: %s", Ehdr_TypeToString(e_type).c_str()); - LOG_NOTICE(LOADER, "Machine: %s", Ehdr_MachineToString(e_machine).c_str()); - LOG_NOTICE(LOADER, "Version: %d", e_version); - LOG_NOTICE(LOADER, "Entry point address: 0x%x", e_entry); - LOG_NOTICE(LOADER, "Program headers offset: 0x%08x", e_phoff); - LOG_NOTICE(LOADER, "Section headers offset: 0x%08x", e_shoff); - LOG_NOTICE(LOADER, "Flags: 0x%x", e_flags); - LOG_NOTICE(LOADER, "Size of this header: %d", e_ehsize); - LOG_NOTICE(LOADER, "Size of program headers: %d", e_phentsize); - LOG_NOTICE(LOADER, "Number of program headers: %d", e_phnum); - LOG_NOTICE(LOADER, "Size of section headers: %d", e_shentsize); - LOG_NOTICE(LOADER, "Number of section headers: %d", e_shnum); - LOG_NOTICE(LOADER, "Section header string table index: %d", e_shstrndx); -#endif -} - -void Elf32_Ehdr::Load(vfsStream& f) -{ - e_magic = Read32(f); - e_class = Read8(f); - e_data = Read8(f); - e_curver = Read8(f); - e_os_abi = Read8(f); - - if(IsLittleEndian()) + namespace handlers { - e_abi_ver = Read64LE(f); - e_type = Read16LE(f); - e_machine = Read16LE(f); - e_version = Read32LE(f); - e_entry = Read32LE(f); - e_phoff = Read32LE(f); - e_shoff = Read32LE(f); - e_flags = Read32LE(f); - e_ehsize = Read16LE(f); - e_phentsize = Read16LE(f); - e_phnum = Read16LE(f); - e_shentsize = Read16LE(f); - e_shnum = Read16LE(f); - e_shstrndx = Read16LE(f); - } - else - { - e_abi_ver = Read64(f); - e_type = Read16(f); - e_machine = Read16(f); - e_version = Read32(f); - e_entry = Read32(f); - e_phoff = Read32(f); - e_shoff = Read32(f); - e_flags = Read32(f); - e_ehsize = Read16(f); - e_phentsize = Read16(f); - e_phnum = Read16(f); - e_shentsize = Read16(f); - e_shnum = Read16(f); - e_shstrndx = Read16(f); - } -} - -void Elf32_Desc::Load(vfsStream& f) -{ - revision = Read32(f); - ls_size = Read32(f); - stack_size = Read32(f); - flags = Read32(f); -} - -void Elf32_Desc::LoadLE(vfsStream& f) -{ - revision = Read32LE(f); - ls_size = Read32LE(f); - stack_size = Read32LE(f); - flags = Read32LE(f); -} - -void Elf32_Note::Load(vfsStream& f) -{ - namesz = Read32(f); - descsz = Read32(f); - type = Read32(f); - f.Read(name, 8); - - if (descsz == 32) - { - f.Read(desc_text, descsz); - } - else - { - desc.Load(f); - } -} - -void Elf32_Note::LoadLE(vfsStream& f) -{ - namesz = Read32LE(f); - descsz = Read32LE(f); - type = Read32LE(f); - f.Read(name, 8); - - if (descsz == 32) - { - f.Read(desc_text, descsz); - } - else - { - desc.Load(f); - } -} - -void Elf32_Shdr::Load(vfsStream& f) -{ - sh_name = Read32(f); - sh_type = Read32(f); - sh_flags = Read32(f); - sh_addr = Read32(f); - sh_offset = Read32(f); - sh_size = Read32(f); - sh_link = Read32(f); - sh_info = Read32(f); - sh_addralign = Read32(f); - sh_entsize = Read32(f); -} - -void Elf32_Shdr::LoadLE(vfsStream& f) -{ - sh_name = Read32LE(f); - sh_type = Read32LE(f); - sh_flags = Read32LE(f); - sh_addr = Read32LE(f); - sh_offset = Read32LE(f); - sh_size = Read32LE(f); - sh_link = Read32LE(f); - sh_info = Read32LE(f); - sh_addralign = Read32LE(f); - sh_entsize = Read32LE(f); -} - -void Elf32_Shdr::Show() -{ -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, "Name offset: 0x%x", sh_name); - LOG_NOTICE(LOADER, "Type: 0x%d", sh_type); - LOG_NOTICE(LOADER, "Addr: 0x%x", sh_addr); - LOG_NOTICE(LOADER, "Offset: 0x%x", sh_offset); - LOG_NOTICE(LOADER, "Size: 0x%x", sh_size); - LOG_NOTICE(LOADER, "EntSize: %d", sh_entsize); - LOG_NOTICE(LOADER, "Flags: 0x%x", sh_flags); - LOG_NOTICE(LOADER, "Link: 0x%x", sh_link); - LOG_NOTICE(LOADER, "Info: %d", sh_info); - LOG_NOTICE(LOADER, "Address align: 0x%x", sh_addralign); -#endif -} - -void Elf32_Phdr::Load(vfsStream& f) -{ - p_type = Read32(f); - p_offset = Read32(f); - p_vaddr = Read32(f); - p_paddr = Read32(f); - p_filesz = Read32(f); - p_memsz = Read32(f); - p_flags = Read32(f); - p_align = Read32(f); -} - -void Elf32_Phdr::LoadLE(vfsStream& f) -{ - p_type = Read32LE(f); - p_offset = Read32LE(f); - p_vaddr = Read32LE(f); - p_paddr = Read32LE(f); - p_filesz = Read32LE(f); - p_memsz = Read32LE(f); - p_flags = Read32LE(f); - p_align = Read32LE(f); -} - -void Elf32_Phdr::Show() -{ -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, "Type: %s", Phdr_TypeToString(p_type).c_str()); - LOG_NOTICE(LOADER, "Offset: 0x%08x", p_offset); - LOG_NOTICE(LOADER, "Virtual address: 0x%08x", p_vaddr); - LOG_NOTICE(LOADER, "Physical address: 0x%08x", p_paddr); - LOG_NOTICE(LOADER, "File size: 0x%08x", p_filesz); - LOG_NOTICE(LOADER, "Memory size: 0x%08x", p_memsz); - LOG_NOTICE(LOADER, "Flags: %s", Phdr_FlagsToString(p_flags).c_str()); - LOG_NOTICE(LOADER, "Align: 0x%x", p_align); -#endif -} - -void WriteEhdr(rFile& f, Elf32_Ehdr& ehdr) -{ - Write32(f, ehdr.e_magic); - Write8(f, ehdr.e_class); - Write8(f, ehdr.e_data); - Write8(f, ehdr.e_curver); - Write8(f, ehdr.e_os_abi); - Write64(f, ehdr.e_abi_ver); - Write16(f, ehdr.e_type); - Write16(f, ehdr.e_machine); - Write32(f, ehdr.e_version); - Write32(f, ehdr.e_entry); - Write32(f, ehdr.e_phoff); - Write32(f, ehdr.e_shoff); - Write32(f, ehdr.e_flags); - Write16(f, ehdr.e_ehsize); - Write16(f, ehdr.e_phentsize); - Write16(f, ehdr.e_phnum); - Write16(f, ehdr.e_shentsize); - Write16(f, ehdr.e_shnum); - Write16(f, ehdr.e_shstrndx); -} - -void WritePhdr(rFile& f, Elf32_Phdr& phdr) -{ - Write32(f, phdr.p_type); - Write32(f, phdr.p_offset); - Write32(f, phdr.p_vaddr); - Write32(f, phdr.p_paddr); - Write32(f, phdr.p_filesz); - Write32(f, phdr.p_memsz); - Write32(f, phdr.p_flags); - Write32(f, phdr.p_align); -} - -void WriteShdr(rFile& f, Elf32_Shdr& shdr) -{ - Write32(f, shdr.sh_name); - Write32(f, shdr.sh_type); - Write32(f, shdr.sh_flags); - Write32(f, shdr.sh_addr); - Write32(f, shdr.sh_offset); - Write32(f, shdr.sh_size); - Write32(f, shdr.sh_link); - Write32(f, shdr.sh_info); - Write32(f, shdr.sh_addralign); - Write32(f, shdr.sh_entsize); -} - -ELF32Loader::ELF32Loader(vfsStream& f) - : elf32_f(f) - , LoaderBase() -{ -} - -bool ELF32Loader::LoadInfo() -{ - if(!elf32_f.IsOpened()) return false; - - if(!LoadEhdrInfo()) return false; - if(!LoadPhdrInfo()) return false; - if(!LoadShdrInfo()) return false; - - return true; -} - -bool ELF32Loader::LoadData(u64 offset) -{ - if(!elf32_f.IsOpened()) return false; - - if(!LoadEhdrData(offset)) return false; - if(!LoadPhdrData(offset)) return false; - if(!LoadShdrData(offset)) return false; - - return true; -} - -bool ELF32Loader::Close() -{ - return elf32_f.Close(); -} - -bool ELF32Loader::LoadEhdrInfo() -{ - elf32_f.Seek(0); - ehdr.Load(elf32_f); - - if(!ehdr.CheckMagic()) return false; - - if(ehdr.IsLittleEndian()) - LOG_WARNING(LOADER, "ELF32 LE"); - - switch(ehdr.e_machine) - { - case MACHINE_MIPS: - case MACHINE_PPC64: - case MACHINE_SPU: - case MACHINE_ARM: - machine = (Elf_Machine)ehdr.e_machine; - break; - - default: - machine = MACHINE_Unknown; - LOG_ERROR(LOADER, "Unknown elf32 machine: 0x%x", ehdr.e_machine); - return false; - } - - entry = ehdr.GetEntry(); - if(entry == 0) - { - LOG_ERROR(LOADER, "elf32 error: entry is null!"); - return false; - } - - return true; -} - -bool ELF32Loader::LoadPhdrInfo() -{ - if(ehdr.e_phoff == 0 && ehdr.e_phnum) - { - LOG_ERROR(LOADER, "LoadPhdr32 error: Program header offset is null!"); - return false; - } - - elf32_f.Seek(ehdr.e_phoff); - for(uint i=0; i= entry && entry < phdr_arr[i].p_paddr + phdr_arr[i].p_memsz) + error_code res = handler::init(stream); + + if (res != ok) + return res; + + m_stream->Read(&m_ehdr, sizeof(ehdr)); + + if (!m_ehdr.check()) { - entry += phdr_arr[i].p_vaddr; - LOG_WARNING(LOADER, "virtual entry = 0x%x", entry); - break; - } - } - } - - return true; -} - -bool ELF32Loader::LoadShdrInfo() -{ - elf32_f.Seek(ehdr.e_shoff); - for(u32 i=0; i= shdr_arr.size()) - { - LOG_WARNING(LOADER, "LoadShdr32 error: shstrndx too big!"); - return true; - } - - for(u32 i=0; i max_addr) + if (m_ehdr.data_le.e_phnum && (m_ehdr.is_le() ? m_ehdr.data_le.e_phentsize : m_ehdr.data_be.e_phentsize) != sizeof(phdr)) { - max_addr = phdr_arr[i].p_vaddr + phdr_arr[i].p_memsz; + return broken_file; } - if(phdr_arr[i].p_vaddr != phdr_arr[i].p_paddr) + if (m_ehdr.data_le.e_shnum && (m_ehdr.is_le() ? m_ehdr.data_le.e_shentsize : m_ehdr.data_be.e_shentsize) != sizeof(shdr)) { - LOG_WARNING - ( - LOADER, - "LoadPhdr32 different load addrs: paddr=0x%8.8x, vaddr=0x%8.8x", - phdr_arr[i].p_paddr, phdr_arr[i].p_vaddr - ); + return broken_file; } - switch(machine) + LOG_ERROR(LOADER, "m_ehdr.e_type = 0x%x", (u16)(m_ehdr.is_le() ? m_ehdr.data_le.e_type : m_ehdr.data_be.e_type)); + + if (m_ehdr.data_le.e_phnum) { - case MACHINE_SPU: break; - case MACHINE_MIPS: Memory.PSP.RAM.AllocFixed(phdr_arr[i].p_vaddr + offset, phdr_arr[i].p_memsz); break; - case MACHINE_ARM: Memory.PSV.RAM.AllocFixed(phdr_arr[i].p_vaddr + offset, phdr_arr[i].p_memsz); break; - - default: - continue; - } - - elf32_f.Seek(phdr_arr[i].p_offset); - elf32_f.Read(vm::get_ptr(phdr_arr[i].p_vaddr + offset), phdr_arr[i].p_filesz); - } - else if(phdr_arr[i].p_type == 0x00000004) - { - elf32_f.Seek(phdr_arr[i].p_offset); - Elf32_Note note; - if(ehdr.IsLittleEndian()) note.LoadLE(elf32_f); - else note.Load(elf32_f); - - if(note.type != 1) - { - LOG_ERROR(LOADER, "ELF32: Bad NOTE type (%d)", note.type); - break; - } - - if(note.namesz != sizeof(note.name)) - { - LOG_ERROR(LOADER, "ELF32: Bad NOTE namesz (%d)", note.namesz); - break; - } - - if(note.descsz != sizeof(note.desc) && note.descsz != 32) - { - LOG_ERROR(LOADER, "ELF32: Bad NOTE descsz (%d)", note.descsz); - break; - } - - //if(note.desc.flags) - //{ - // LOG_ERROR(LOADER, "ELF32: Bad NOTE flags (0x%x)", note.desc.flags); - // break; - //} - - if(note.descsz == sizeof(note.desc)) - { - LOG_WARNING(LOADER, "name = %s", std::string((const char *)note.name, 8).c_str()); - LOG_WARNING(LOADER, "ls_size = %d", note.desc.ls_size); - LOG_WARNING(LOADER, "stack_size = %d", note.desc.stack_size); + m_phdrs.resize(m_ehdr.is_le() ? m_ehdr.data_le.e_phnum : m_ehdr.data_be.e_phnum); + m_stream->Seek(handler::get_stream_offset() + (m_ehdr.is_le() ? m_ehdr.data_le.e_phoff : m_ehdr.data_be.e_phoff)); + size_t size = (m_ehdr.is_le() ? m_ehdr.data_le.e_phnum : m_ehdr.data_be.e_phnum) * sizeof(phdr); + if (m_stream->Read(m_phdrs.data(), size) != size) + return broken_file; } else + m_phdrs.clear(); + + if (m_ehdr.data_le.e_shnum) { - LOG_WARNING(LOADER, "desc = '%s'", std::string(note.desc_text, 32).c_str()); + m_phdrs.resize(m_ehdr.is_le() ? m_ehdr.data_le.e_shnum : m_ehdr.data_be.e_shnum); + m_stream->Seek(handler::get_stream_offset() + (m_ehdr.is_le() ? m_ehdr.data_le.e_shoff : m_ehdr.data_be.e_shoff)); + size_t size = (m_ehdr.is_le() ? m_ehdr.data_le.e_phnum : m_ehdr.data_be.e_phnum) * sizeof(phdr); + + if (m_stream->Read(m_shdrs.data(), size) != size) + return broken_file; } - } -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, ""); -#endif - } + else + m_shdrs.clear(); - return true; -} - -bool ELF32Loader::LoadShdrData(u64 offset) -{ - u32 fnid_addr = 0; - - for(u32 i=0; i max_addr) + handler::error_code elf32::load() { - max_addr = shdr.sh_addr + shdr.sh_size; - } - - // probably should be in LoadPhdrData() - if (machine == MACHINE_ARM && !strcmp(shdr_name_arr[i].c_str(), ".sceFNID.rodata")) - { - fnid_addr = shdr.sh_addr; - } - else if (machine == MACHINE_ARM && !strcmp(shdr_name_arr[i].c_str(), ".sceFStub.rodata")) - { - list_known_psv_modules(); - - auto fnid = vm::psv::ptr::make(fnid_addr); - auto fstub = vm::psv::ptr::make(shdr.sh_addr); - - for (u32 j = 0; j < shdr.sh_size / 4; j++) + Elf_Machine machine; + switch (machine = (Elf_Machine)(u16)(m_ehdr.is_le() ? m_ehdr.data_le.e_machine : m_ehdr.data_be.e_machine)) { - u32 nid = fnid[j]; - u32 addr = fstub[j]; + case MACHINE_MIPS: vm::psp::init(); break; + case MACHINE_ARM: vm::psv::init(); break; + case MACHINE_SPU: vm::ps3::init(); break; - if (auto func = get_psv_func_by_nid(nid)) + default: + return bad_version; + } + + error_code res = load_data(0); + + if (res != ok) + return res; + + switch (machine) + { + case MACHINE_MIPS: break; + case MACHINE_ARM: arm7_thread(m_ehdr.is_le() ? m_ehdr.data_le.e_entry : m_ehdr.data_be.e_entry, "main_thread").args({ Emu.GetPath()/*, "-emu"*/ }).run(); break; + case MACHINE_SPU: spu_thread(m_ehdr.is_le() ? m_ehdr.data_le.e_entry : m_ehdr.data_be.e_entry, "main_thread").args({ Emu.GetPath()/*, "-emu"*/ }).run(); break; + } + + return ok; + } + + handler::error_code elf32::load_data(u32 offset) + { + for (auto &phdr : m_phdrs) + { + u32 memsz = m_ehdr.is_le() ? phdr.data_le.p_memsz : phdr.data_be.p_memsz; + u32 filesz = m_ehdr.is_le() ? phdr.data_le.p_filesz : phdr.data_be.p_filesz; + u32 vaddr = offset + (m_ehdr.is_le() ? phdr.data_le.p_vaddr : phdr.data_be.p_vaddr); + u32 offset = m_ehdr.is_le() ? phdr.data_le.p_offset : phdr.data_be.p_offset; + + switch (m_ehdr.is_le() ? phdr.data_le.p_type : phdr.data_be.p_type) { - if (func->module) - func->module->Notice("Imported function %s (nid=0x%08x, addr=0x%x)", func->name, nid, addr); - else - LOG_NOTICE(LOADER, "Imported function %s (nid=0x%08x, addr=0x%x)", func->name, nid, addr); + case 0x00000001: //LOAD + if (phdr.data_le.p_memsz) + { + if (!vm::alloc(vaddr, memsz, vm::main)) + { + LOG_ERROR(LOADER, "%s(): AllocFixed(0x%llx, 0x%x) failed", __FUNCTION__, vaddr, memsz); - // writing Thumb code (temporarily, because it should be ARM) - vm::psv::write16(addr + 0, 0xf870); // HACK (special instruction that calls HLE function) - vm::psv::write16(addr + 2, (u16)get_psv_func_index(func)); - vm::psv::write16(addr + 4, 0x4770); // BX LR - vm::psv::write16(addr + 6, 0); // null - } - else - { - LOG_ERROR(LOADER, "Unimplemented function 0x%08x (addr=0x%x)", nid, addr); + return loading_error; + } - vm::psv::write16(addr + 0, 0xf870); // HACK (special instruction that calls HLE function) - vm::psv::write16(addr + 2, 0x0000); // (zero index) - vm::psv::write16(addr + 4, 0x4770); // BX LR - vm::psv::write16(addr + 6, 0); // null + if (filesz) + { + m_stream->Seek(handler::get_stream_offset() + offset); + m_stream->Read(vm::get_ptr(vaddr), filesz); + } + } + break; } } - } - else if (machine == MACHINE_ARM && !strcmp(shdr_name_arr[i].c_str(), ".sceRefs.rodata")) - { - auto code = vm::psv::ptr::make(shdr.sh_addr); - u32 data = 0; - for (; code.addr() < shdr.sh_addr + shdr.sh_size; code++) - { - switch (*code) - { - case 0x000000ff: - { - // save address for future use - data = *++code; - break; - } - case 0x0000002f: - { - // movw r12,# instruction will be replaced - const u32 addr = *++code; - vm::psv::write16(addr + 0, 0xf240 | (data & 0x800) >> 1 | (data & 0xf000) >> 12); // MOVW - vm::psv::write16(addr + 2, 0x0c00 | (data & 0x700) << 4 | (data & 0xff)); - break; - } - case 0x00000030: - { - // movt r12,# instruction will be replaced - const u32 addr = *++code; - vm::psv::write16(addr + 0, 0xf2c0 | (data & 0x8000000) >> 17 | (data & 0xf0000000) >> 28); // MOVT - vm::psv::write16(addr + 2, 0x0c00 | (data & 0x7000000) >> 12 | (data & 0xff0000) >> 16); - break; - } - case 0x00000000: - { - // probably, no operation - break; - } - default: LOG_NOTICE(LOADER, "sceRefs: unknown code found (0x%08x)", *code); - } - } + return ok; } } - - //TODO - return true; -} +} \ No newline at end of file diff --git a/rpcs3/Loader/ELF32.h b/rpcs3/Loader/ELF32.h index 1d573824d3..dc37dd5f0b 100644 --- a/rpcs3/Loader/ELF32.h +++ b/rpcs3/Loader/ELF32.h @@ -4,135 +4,137 @@ struct vfsStream; class rFile; -struct Elf32_Ehdr +namespace loader { - u32 e_magic; - u8 e_class; - u8 e_data; - u8 e_curver; - u8 e_os_abi; - u64 e_abi_ver; - u16 e_type; - u16 e_machine; - u32 e_version; - u32 e_entry; - u32 e_phoff; - u32 e_shoff; - u32 e_flags; - u16 e_ehsize; - u16 e_phentsize; - u16 e_phnum; - u16 e_shentsize; - u16 e_shnum; - u16 e_shstrndx; - - void Show(); - - bool IsLittleEndian() const + namespace handlers { - return e_data == 1; + class elf32 : public handler + { + public: + struct ehdr + { + u32 e_magic; + u8 e_class; + u8 e_data; + u8 e_curver; + u8 e_os_abi; + + union + { + struct + { + u64 e_abi_ver; + u16 e_type; + u16 e_machine; + u32 e_version; + u16 e_entry; + u32 e_phoff; + u32 e_shoff; + u32 e_flags; + u16 e_ehsize; + u16 e_phentsize; + u16 e_phnum; + u16 e_shentsize; + u16 e_shnum; + u16 e_shstrndx; + } data_le; + + struct + { + be_t e_abi_ver; + be_t e_type; + be_t e_machine; + be_t e_version; + be_t e_entry; + be_t e_phoff; + be_t e_shoff; + be_t e_flags; + be_t e_ehsize; + be_t e_phentsize; + be_t e_phnum; + be_t e_shentsize; + be_t e_shnum; + be_t e_shstrndx; + } data_be; + }; + + bool is_le() const { return e_data == 1; } + bool check() const { return e_magic == 0x7F454C46; } + }; + + struct shdr + { + union + { + struct + { + u32 sh_name; + u32 sh_type; + u32 sh_flags; + u32 sh_addr; + u32 sh_offset; + u32 sh_size; + u32 sh_link; + u32 sh_info; + u32 sh_addralign; + u32 sh_entsize; + } data_le; + + struct + { + be_t sh_name; + be_t sh_type; + be_t sh_flags; + be_t sh_addr; + be_t sh_offset; + be_t sh_size; + be_t sh_link; + be_t sh_info; + be_t sh_addralign; + be_t sh_entsize; + } data_be; + }; + }; + + struct phdr + { + union + { + struct + { + u32 p_type; + u32 p_offset; + u32 p_vaddr; + u32 p_paddr; + u32 p_filesz; + u32 p_memsz; + u32 p_flags; + u32 p_align; + } data_le; + + struct + { + be_t p_type; + be_t p_offset; + be_t p_vaddr; + be_t p_paddr; + be_t p_filesz; + be_t p_memsz; + be_t p_flags; + be_t p_align; + } data_be; + }; + }; + + ehdr m_ehdr; + std::vector m_phdrs; + std::vector m_shdrs; + + error_code init(vfsStream& stream) override; + error_code load() override; + error_code load_data(u32 offset); + + virtual ~elf32() = default; + }; } - - void Load(vfsStream& f); - - bool CheckMagic() const { return e_magic == 0x7F454C46; } - u32 GetEntry() const { return e_entry; } -}; - -struct Elf32_Desc -{ - u32 revision; - u32 ls_size; - u32 stack_size; - u32 flags; - - void Load(vfsStream& f); - - void LoadLE(vfsStream& f); -}; - -struct Elf32_Note -{ - u32 namesz; - u32 descsz; - u32 type; - u8 name[8]; - union - { - Elf32_Desc desc; - char desc_text[32]; - }; - - void Load(vfsStream& f); - - void LoadLE(vfsStream& f); -}; - -struct Elf32_Shdr -{ - u32 sh_name; - u32 sh_type; - u32 sh_flags; - u32 sh_addr; - u32 sh_offset; - u32 sh_size; - u32 sh_link; - u32 sh_info; - u32 sh_addralign; - u32 sh_entsize; - - void Load(vfsStream& f); - - void LoadLE(vfsStream& f); - - void Show(); -}; - -struct Elf32_Phdr -{ - u32 p_type; - u32 p_offset; - u32 p_vaddr; - u32 p_paddr; - u32 p_filesz; - u32 p_memsz; - u32 p_flags; - u32 p_align; - - void Load(vfsStream& f); - - void LoadLE(vfsStream& f); - - void Show(); -}; - -class ELF32Loader : public LoaderBase -{ - vfsStream& elf32_f; - -public: - Elf32_Ehdr ehdr; - std::vector shdr_name_arr; - std::vector shdr_arr; - std::vector phdr_arr; - - ELF32Loader(vfsStream& f); - ~ELF32Loader() {Close();} - - virtual bool LoadInfo(); - virtual bool LoadData(u64 offset); - virtual bool Close(); - -private: - bool LoadEhdrInfo(); - bool LoadPhdrInfo(); - bool LoadShdrInfo(); - - bool LoadEhdrData(u64 offset); - bool LoadPhdrData(u64 offset); - bool LoadShdrData(u64 offset); -}; - -void WriteEhdr(rFile& f, Elf32_Ehdr& ehdr); -void WritePhdr(rFile& f, Elf32_Phdr& phdr); -void WriteShdr(rFile& f, Elf32_Shdr& shdr); +} diff --git a/rpcs3/Loader/ELF64.cpp b/rpcs3/Loader/ELF64.cpp index 119a4913b9..40e2b80a4d 100644 --- a/rpcs3/Loader/ELF64.cpp +++ b/rpcs3/Loader/ELF64.cpp @@ -2,555 +2,489 @@ #include "Utilities/Log.h" #include "Utilities/rFile.h" #include "Emu/FS/vfsStream.h" +#include "Emu/FS/vfsFile.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" #include "Emu/SysCalls/SysCalls.h" #include "Emu/SysCalls/Static.h" -#include "Emu/Cell/PPUInstrTable.h" #include "Emu/SysCalls/ModuleManager.h" +#include "Emu/SysCalls/lv2/sys_prx.h" +#include "Emu/Cell/PPUInstrTable.h" +#include "Emu/CPU/CPUThreadManager.h" #include "ELF64.h" using namespace PPU_instr; -void Elf64_Ehdr::Load(vfsStream& f) +namespace loader { - e_magic = Read32(f); - e_class = Read8(f); - e_data = Read8(f); - e_curver = Read8(f); - e_os_abi = Read8(f); - e_abi_ver = Read64(f); - e_type = Read16(f); - e_machine = Read16(f); - e_version = Read32(f); - e_entry = Read64(f); - e_phoff = Read64(f); - e_shoff = Read64(f); - e_flags = Read32(f); - e_ehsize = Read16(f); - e_phentsize = Read16(f); - e_phnum = Read16(f); - e_shentsize = Read16(f); - e_shnum = Read16(f); - e_shstrndx = Read16(f); -} - -void Elf64_Ehdr::Show() -{ -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, "Magic: %08x", e_magic); - LOG_NOTICE(LOADER, "Class: %s", "ELF64"); - LOG_NOTICE(LOADER, "Data: %s", Ehdr_DataToString(e_data).c_str()); - LOG_NOTICE(LOADER, "Current Version: %d", e_curver); - LOG_NOTICE(LOADER, "OS/ABI: %s", Ehdr_OS_ABIToString(e_os_abi).c_str()); - LOG_NOTICE(LOADER, "ABI version: %lld", e_abi_ver); - LOG_NOTICE(LOADER, "Type: %s", Ehdr_TypeToString(e_type).c_str()); - LOG_NOTICE(LOADER, "Machine: %s", Ehdr_MachineToString(e_machine).c_str()); - LOG_NOTICE(LOADER, "Version: %d", e_version); - LOG_NOTICE(LOADER, "Entry point address: 0x%08llx", e_entry); - LOG_NOTICE(LOADER, "Program headers offset: 0x%08llx", e_phoff); - LOG_NOTICE(LOADER, "Section headers offset: 0x%08llx", e_shoff); - LOG_NOTICE(LOADER, "Flags: 0x%x", e_flags); - LOG_NOTICE(LOADER, "Size of this header: %d", e_ehsize); - LOG_NOTICE(LOADER, "Size of program headers: %d", e_phentsize); - LOG_NOTICE(LOADER, "Number of program headers: %d", e_phnum); - LOG_NOTICE(LOADER, "Size of section headers: %d", e_shentsize); - LOG_NOTICE(LOADER, "Number of section headers: %d", e_shnum); - LOG_NOTICE(LOADER, "Section header string table index: %d", e_shstrndx); -#endif -} - -void Elf64_Shdr::Load(vfsStream& f) -{ - sh_name = Read32(f); - sh_type = Read32(f); - sh_flags = Read64(f); - sh_addr = Read64(f); - sh_offset = Read64(f); - sh_size = Read64(f); - sh_link = Read32(f); - sh_info = Read32(f); - sh_addralign = Read64(f); - sh_entsize = Read64(f); -} - -void Elf64_Shdr::Show() -{ -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, "Name offset: 0x%x", sh_name); - LOG_NOTICE(LOADER, "Type: %d", sh_type); - LOG_NOTICE(LOADER, "Addr: 0x%llx", sh_addr); - LOG_NOTICE(LOADER, "Offset: 0x%llx", sh_offset); - LOG_NOTICE(LOADER, "Size: 0x%llx", sh_size); - LOG_NOTICE(LOADER, "EntSize: %lld", sh_entsize); - LOG_NOTICE(LOADER, "Flags: 0x%llx", sh_flags); - LOG_NOTICE(LOADER, "Link: 0x%x", sh_link); - LOG_NOTICE(LOADER, "Info: 0x%x", sh_info); - LOG_NOTICE(LOADER, "Address align: 0x%llx", sh_addralign); -#endif -} - -void Elf64_Phdr::Load(vfsStream& f) -{ - p_type = Read32(f); - p_flags = Read32(f); - p_offset = Read64(f); - p_vaddr = Read64(f); - p_paddr = Read64(f); - p_filesz = Read64(f); - p_memsz = Read64(f); - p_align = Read64(f); -} - -void Elf64_Phdr::Show() -{ -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, "Type: %s", Phdr_TypeToString(p_type).c_str()); - LOG_NOTICE(LOADER, "Offset: 0x%08llx", p_offset); - LOG_NOTICE(LOADER, "Virtual address: 0x%08llx", p_vaddr); - LOG_NOTICE(LOADER, "Physical address: 0x%08llx", p_paddr); - LOG_NOTICE(LOADER, "File size: 0x%08llx", p_filesz); - LOG_NOTICE(LOADER, "Memory size: 0x%08llx", p_memsz); - LOG_NOTICE(LOADER, "Flags: %s", Phdr_FlagsToString(p_flags).c_str()); - LOG_NOTICE(LOADER, "Align: 0x%llx", p_align); -#endif -} - -void WriteEhdr(rFile& f, Elf64_Ehdr& ehdr) -{ - Write32(f, ehdr.e_magic); - Write8(f, ehdr.e_class); - Write8(f, ehdr.e_data); - Write8(f, ehdr.e_curver); - Write8(f, ehdr.e_os_abi); - Write64(f, ehdr.e_abi_ver); - Write16(f, ehdr.e_type); - Write16(f, ehdr.e_machine); - Write32(f, ehdr.e_version); - Write64(f, ehdr.e_entry); - Write64(f, ehdr.e_phoff); - Write64(f, ehdr.e_shoff); - Write32(f, ehdr.e_flags); - Write16(f, ehdr.e_ehsize); - Write16(f, ehdr.e_phentsize); - Write16(f, ehdr.e_phnum); - Write16(f, ehdr.e_shentsize); - Write16(f, ehdr.e_shnum); - Write16(f, ehdr.e_shstrndx); -} - -void WritePhdr(rFile& f, Elf64_Phdr& phdr) -{ - Write32(f, phdr.p_type); - Write32(f, phdr.p_flags); - Write64(f, phdr.p_offset); - Write64(f, phdr.p_vaddr); - Write64(f, phdr.p_paddr); - Write64(f, phdr.p_filesz); - Write64(f, phdr.p_memsz); - Write64(f, phdr.p_align); -} - -void WriteShdr(rFile& f, Elf64_Shdr& shdr) -{ - Write32(f, shdr.sh_name); - Write32(f, shdr.sh_type); - Write64(f, shdr.sh_flags); - Write64(f, shdr.sh_addr); - Write64(f, shdr.sh_offset); - Write64(f, shdr.sh_size); - Write32(f, shdr.sh_link); - Write32(f, shdr.sh_info); - Write64(f, shdr.sh_addralign); - Write64(f, shdr.sh_entsize); -} - -ELF64Loader::ELF64Loader(vfsStream& f) - : elf64_f(f) - , LoaderBase() -{ - int a = 0; -} - -bool ELF64Loader::LoadInfo() -{ - if(!elf64_f.IsOpened()) return false; - - if(!LoadEhdrInfo()) return false; - if(!LoadPhdrInfo()) return false; - if(!LoadShdrInfo()) return false; - - return true; -} - -bool ELF64Loader::LoadData(u64 offset) -{ - if(!elf64_f.IsOpened()) return false; - - if(!LoadEhdrData(offset)) return false; - if(!LoadPhdrData(offset)) return false; - if(!LoadShdrData(offset)) return false; - - return true; -} - -bool ELF64Loader::Close() -{ - return elf64_f.Close(); -} - -bool ELF64Loader::LoadEhdrInfo(s64 offset) -{ - elf64_f.Seek(offset < 0 ? 0 : offset); - ehdr.Load(elf64_f); - - if(!ehdr.CheckMagic()) return false; - - if(ehdr.e_phentsize != sizeof(Elf64_Phdr)) + namespace handlers { - LOG_ERROR(LOADER, "elf64 error: e_phentsize[0x%x] != sizeof(Elf64_Phdr)[0x%x]", ehdr.e_phentsize, sizeof(Elf64_Phdr)); - return false; - } - - if(ehdr.e_shentsize != sizeof(Elf64_Shdr)) - { - LOG_ERROR(LOADER, "elf64 error: e_shentsize[0x%x] != sizeof(Elf64_Shdr)[0x%x]", ehdr.e_shentsize, sizeof(Elf64_Shdr)); - return false; - } - - switch(ehdr.e_machine) - { - case MACHINE_PPC64: - case MACHINE_SPU: - machine = (Elf_Machine)ehdr.e_machine; - break; - - default: - machine = MACHINE_Unknown; - LOG_ERROR(LOADER, "Unknown elf64 type: 0x%x", ehdr.e_machine); - return false; - } - - entry = (u32)ehdr.GetEntry(); - if(entry == 0) - { - LOG_ERROR(LOADER, "elf64 error: entry is null!"); - return false; - } - - return true; -} - -bool ELF64Loader::LoadPhdrInfo(s64 offset) -{ - phdr_arr.clear(); - - if(ehdr.e_phoff == 0 && ehdr.e_phnum) - { - LOG_ERROR(LOADER, "LoadPhdr64 error: Program header offset is null!"); - return false; - } - - elf64_f.Seek(offset < 0 ? ehdr.e_phoff : offset); - - for(u32 i=0; i= shdr_arr.size()) - { - LOG_WARNING(LOADER, "LoadShdr64 error: shstrndx too big!"); - return true; - } - - for(u32 i=0; iRead(&m_ehdr, sizeof(ehdr)); -bool ELF64Loader::LoadPhdrData(u64 offset) -{ - for(auto& phdr: phdr_arr) - { - phdr.Show(); - - if (phdr.p_vaddr < min_addr) - { - min_addr = (u32)phdr.p_vaddr; - } - - if (phdr.p_vaddr + phdr.p_memsz > max_addr) - { - max_addr = (u32)(phdr.p_vaddr + phdr.p_memsz); - } - - if (phdr.p_vaddr != phdr.p_paddr) - { - LOG_WARNING(LOADER, "ElfProgram different load addrs: paddr=0x%8.8x, vaddr=0x%8.8x", - phdr.p_paddr, phdr.p_vaddr); - } - - if(!Memory.MainMem.IsInMyRange(offset + phdr.p_vaddr, (u32)phdr.p_memsz)) - { -#ifdef LOADER_DEBUG - LOG_WARNING(LOADER, "Skipping..."); - LOG_WARNING(LOADER, ""); -#endif - continue; - } - - switch(phdr.p_type) - { - case 0x00000001: //LOAD - if(phdr.p_memsz) - { - if (!Memory.MainMem.AllocFixed(offset + phdr.p_vaddr, (u32)phdr.p_memsz)) - { - LOG_ERROR(LOADER, "%s(): AllocFixed(0x%llx, 0x%x) failed", __FUNCTION__, offset + phdr.p_vaddr, (u32)phdr.p_memsz); - } - else if (phdr.p_filesz) - { - elf64_f.Seek(phdr.p_offset); - elf64_f.Read(vm::get_ptr(offset + phdr.p_vaddr), phdr.p_filesz); - Emu.GetSFuncManager().StaticAnalyse(vm::get_ptr(offset + phdr.p_vaddr), (u32)phdr.p_filesz, (u32)phdr.p_vaddr); - } - } - break; - - case 0x00000007: //TLS - Emu.SetTLSData(offset + phdr.p_vaddr, phdr.p_filesz, phdr.p_memsz); - break; - - case 0x60000001: //LOOS+1 + if (!m_ehdr.check()) { - if(!phdr.p_filesz) + return bad_file; + } + + if (m_ehdr.e_phnum && m_ehdr.e_phentsize != sizeof(phdr)) + { + return broken_file; + } + + if (m_ehdr.e_shnum && m_ehdr.e_shentsize != sizeof(shdr)) + { + return broken_file; + } + + LOG_ERROR(LOADER, "m_ehdr.e_type = 0x%x", m_ehdr.e_type.ToLE()); + + if (m_ehdr.e_machine != MACHINE_PPC64 && m_ehdr.e_machine != MACHINE_SPU) + { + LOG_ERROR(LOADER, "Unknown elf64 machine type: 0x%x", m_ehdr.e_machine.ToLE()); + return bad_version; + } + + if (m_ehdr.e_phnum) + { + m_phdrs.resize(m_ehdr.e_phnum); + m_stream->Seek(handler::get_stream_offset() + m_ehdr.e_phoff); + if (m_stream->Read(m_phdrs.data(), m_ehdr.e_phnum * sizeof(phdr)) != m_ehdr.e_phnum * sizeof(phdr)) + return broken_file; + } + else + m_phdrs.clear(); + + if (m_ehdr.e_shnum) + { + m_shdrs.resize(m_ehdr.e_shnum); + m_stream->Seek(handler::get_stream_offset() + m_ehdr.e_shoff); + if (m_stream->Read(m_shdrs.data(), m_ehdr.e_shnum * sizeof(shdr)) != m_ehdr.e_shnum * sizeof(shdr)) + return broken_file; + } + else + m_shdrs.clear(); + + if (is_sprx()) + { + LOG_NOTICE(LOADER, "SPRX loading..."); + + m_stream->Seek(handler::get_stream_offset() + m_phdrs[0].p_paddr.addr()); + m_stream->Read(&m_sprx_module_info, sizeof(sprx_module_info)); + + //m_stream->Seek(handler::get_stream_offset() + m_phdrs[1].p_vaddr.addr()); + //m_stream->Read(&m_sprx_function_info, sizeof(sprx_function_info)); + } + else + { + m_sprx_import_info.clear(); + m_sprx_export_info.clear(); + } + + return ok; + } + + handler::error_code elf64::load_sprx(sprx_info& info) + { + for (auto &phdr : m_phdrs) + { + switch (phdr.p_type) + { + case 0x1: //load + if (phdr.p_memsz) + { + sprx_segment_info segment; + segment.size = phdr.p_memsz; + segment.size_file = phdr.p_filesz; + + segment.begin.set(vm::alloc(segment.size, vm::sprx)); + + if (!segment.begin) + { + LOG_ERROR(LOADER, "%s() sprx: AllocFixed(0x%llx, 0x%x) failed", __FUNCTION__, phdr.p_vaddr.addr(), (u32)phdr.p_memsz); + + return loading_error; + } + + segment.initial_addr.set(phdr.p_vaddr.addr()); + LOG_ERROR(LOADER, "segment addr=0x%x, initial addr = 0x%x", segment.begin.addr(), segment.initial_addr.addr()); + + if (phdr.p_filesz) + { + m_stream->Seek(handler::get_stream_offset() + phdr.p_offset); + m_stream->Read(segment.begin.get_ptr(), phdr.p_filesz); + } + + if (phdr.p_paddr) + { + sys_prx_module_info_t module_info; + m_stream->Seek(handler::get_stream_offset() + phdr.p_paddr.addr()); + m_stream->Read(&module_info, sizeof(module_info)); + LOG_ERROR(LOADER, "%s (%x):", module_info.name, (u32)module_info.toc); + + int import_count = (module_info.imports_end - module_info.imports_start) / sizeof(sys_prx_library_info_t); + + if (import_count) + { + LOG_ERROR(LOADER, "**** Lib '%s'has %d imports!", module_info.name, import_count); + break; + } + + sys_prx_library_info_t lib; + for (u32 e = module_info.exports_start.addr(); + e < module_info.exports_end.addr(); + e += lib.size ? lib.size : sizeof(sys_prx_library_info_t)) + { + m_stream->Seek(handler::get_stream_offset() + phdr.p_offset + e); + m_stream->Read(&lib, sizeof(lib)); + + std::string modulename; + if (lib.name_addr) + { + char name[27]; + m_stream->Seek(handler::get_stream_offset() + phdr.p_offset + lib.name_addr); + modulename = std::string(name, m_stream->Read(name, sizeof(name))); + LOG_ERROR(LOADER, "**** %s", name); + } + + //ModuleManager& manager = Emu.GetModuleManager(); + //Module* module = manager.GetModuleByName(modulename); + + LOG_ERROR(LOADER, "**** 0x%x - 0x%x - 0x%x", (u32)lib.unk4, (u32)lib.unk5, (u32)lib.unk6); + + for (u16 i = 0, end = lib.num_func; i < end; ++i) + { + be_t fnid, fstub; + m_stream->Seek(handler::get_stream_offset() + phdr.p_offset + lib.fnid_addr + i * sizeof(fnid)); + m_stream->Read(&fnid, sizeof(fnid)); + + m_stream->Seek(handler::get_stream_offset() + phdr.p_offset + lib.fstub_addr + i * sizeof(fstub)); + m_stream->Read(&fstub, sizeof(fstub)); + + info.exports[fnid] = fstub; + + //LOG_NOTICE(LOADER, "Exported function '%s' in '%s' module (LLE)", SysCalls::GetHLEFuncName(fnid).c_str(), module_name.c_str()); + LOG_ERROR(LOADER, "**** %s: [%s] -> 0x%x", modulename.c_str(), SysCalls::GetHLEFuncName(fnid).c_str(), (u32)fstub); + } + } + + for (u32 i = module_info.imports_start; + i < module_info.imports_end; + i += lib.size ? lib.size : sizeof(sys_prx_library_info_t)) + { + m_stream->Seek(handler::get_stream_offset() + phdr.p_offset + i); + m_stream->Read(&lib, sizeof(lib)); + } + } + + info.segments.push_back(segment); + } + break; - const sys_process_param& proc_param = vm::get_ref(offset + phdr.p_vaddr); + case 0x700000a4: //relocation + m_stream->Seek(handler::get_stream_offset() + phdr.p_offset); - if (proc_param.size < sizeof(sys_process_param)) - { - LOG_WARNING(LOADER, "Bad proc param size! [0x%x : 0x%x]", proc_param.size, sizeof(sys_process_param)); - } - if (proc_param.magic != 0x13bcc5f6) - { - LOG_ERROR(LOADER, "Bad magic! [0x%x]", proc_param.magic); - } - else - { -#ifdef LOADER_DEBUG - sys_process_param_info& info = Emu.GetInfo().GetProcParam(); - LOG_NOTICE(LOADER, "*** sdk version: 0x%x", info.sdk_version.ToLE()); - LOG_NOTICE(LOADER, "*** primary prio: %d", info.primary_prio.ToLE()); - LOG_NOTICE(LOADER, "*** primary stacksize: 0x%x", info.primary_stacksize.ToLE()); - LOG_NOTICE(LOADER, "*** malloc pagesize: 0x%x", info.malloc_pagesize.ToLE()); - LOG_NOTICE(LOADER, "*** ppc seg: 0x%x", info.ppc_seg.ToLE()); - //LOG_NOTICE(LOADER, "*** crash dump param addr: 0x%x", info.crash_dump_param_addr.ToLE()); -#endif + for (uint i = 0; i < phdr.p_filesz; i += sizeof(sys_prx_relocation_info_t)) + { + sys_prx_relocation_info_t rel; + m_stream->Read(&rel, sizeof(rel)); + + u32 ADDR = info.segments[rel.index_addr].begin.addr() + rel.offset; + + switch ((u32)rel.type) + { + case 1: + LOG_ERROR(LOADER, "**** RELOCATION(1): 0x%x <- 0x%x", ADDR, (u32)(info.segments[rel.index_value].begin.addr() + rel.ptr.addr())); + *vm::ptr::make(ADDR) = info.segments[rel.index_value].begin.addr() + rel.ptr.addr(); + break; + + case 4: + LOG_ERROR(LOADER, "**** RELOCATION(4): 0x%x <- 0x%x", ADDR, (u16)(rel.ptr.addr())); + *vm::ptr::make(ADDR) = (u16)(u64)rel.ptr.addr(); + break; + + case 5: + LOG_ERROR(LOADER, "**** RELOCATION(5): 0x%x <- 0x%x", ADDR, (u16)(info.segments[rel.index_value].begin.addr() >> 16)); + *vm::ptr::make(ADDR) = info.segments[rel.index_value].begin.addr() >> 16; + break; + + case 6: + LOG_ERROR(LOADER, "**** RELOCATION(6): 0x%x <- 0x%x", ADDR, (u16)(info.segments[1].begin.addr() >> 16)); + *vm::ptr::make(ADDR) = info.segments[1].begin.addr() >> 16; + break; + + default: + LOG_ERROR(LOADER, "unknown prx relocation type (0x%x)", (u32)rel.type); + return bad_relocation_type; + } + } + + break; } } - break; - case 0x60000002: //LOOS+2 + for (auto &e : info.exports) { - if(!phdr.p_filesz) - break; + u32 stub = e.second; - const sys_proc_prx_param& proc_prx_param = vm::get_ref(offset + phdr.p_vaddr); - - -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, "*** size: 0x%x", proc_prx_param.size.ToLE()); - LOG_NOTICE(LOADER, "*** magic: 0x%x", proc_prx_param.magic.ToLE()); - LOG_NOTICE(LOADER, "*** version: 0x%x", proc_prx_param.version.ToLE()); - LOG_NOTICE(LOADER, "*** libentstart: 0x%x", proc_prx_param.libentstart.ToLE()); - LOG_NOTICE(LOADER, "*** libentend: 0x%x", proc_prx_param.libentend.ToLE()); - LOG_NOTICE(LOADER, "*** libstubstart: 0x%x", proc_prx_param.libstubstart.ToLE()); - LOG_NOTICE(LOADER, "*** libstubend: 0x%x", proc_prx_param.libstubend.ToLE()); - LOG_NOTICE(LOADER, "*** ver: 0x%x", proc_prx_param.ver.ToLE()); -#endif - - if (proc_prx_param.magic != 0x1b434cec) + for (auto &s : info.segments) { - LOG_ERROR(LOADER, "Bad magic! (0x%x)", proc_prx_param.magic.ToLE()); - break; + if (stub >= s.initial_addr.addr() && stub < s.initial_addr.addr() + s.size_file) + { + stub += s.begin.addr() - s.initial_addr.addr(); + break; + } } - for (u32 s = proc_prx_param.libstubstart; s < proc_prx_param.libstubend; s += sizeof(Elf64_StubHeader)) - { - const Elf64_StubHeader& stub = vm::get_ref(offset + s); - - const std::string module_name = vm::get_ptr(stub.s_modulename); - Module* module = Emu.GetModuleManager().GetModuleByName(module_name); - if (module) - { - //module->SetLoaded(); - } - else - { - LOG_WARNING(LOADER, "Unknown module '%s'", module_name.c_str()); - } - -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, ""); - LOG_NOTICE(LOADER, "*** size: 0x%x", stub.s_size); - LOG_NOTICE(LOADER, "*** version: 0x%x", stub.s_version.ToLE()); - LOG_NOTICE(LOADER, "*** unk0: 0x%x", stub.s_unk0); - LOG_NOTICE(LOADER, "*** unk1: 0x%x", stub.s_unk1.ToLE()); - LOG_NOTICE(LOADER, "*** imports: %d", stub.s_imports.ToLE()); - LOG_NOTICE(LOADER, "*** module name: %s [0x%x]", module_name.c_str(), stub.s_modulename.ToLE()); - LOG_NOTICE(LOADER, "*** nid: 0x%016llx [0x%x]", vm::read64(stub.s_nid), stub.s_nid.ToLE()); - LOG_NOTICE(LOADER, "*** text: 0x%x", stub.s_text.ToLE()); -#endif - static const u32 section = 4 * 3; - u64 tbl = Memory.MainMem.AllocAlign(stub.s_imports * 4 * 2); - u64 dst = Memory.MainMem.AllocAlign(stub.s_imports * section); - - for (u32 i = 0; i < stub.s_imports; ++i) - { - const u32 nid = vm::read32(stub.s_nid + i * 4); - const u32 text = vm::read32(stub.s_text + i * 4); - - if (!module || !module->Load(nid)) - { - LOG_WARNING(LOADER, "Unimplemented function '%s' in '%s' module", SysCalls::GetHLEFuncName(nid).c_str(), module_name.c_str()); - } - else //if (Ini.HLELogging.GetValue()) - { - LOG_NOTICE(LOADER, "Imported function '%s' in '%s' module", SysCalls::GetHLEFuncName(nid).c_str(), module_name.c_str()); - } -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, "import %d:", i+1); - LOG_NOTICE(LOADER, "*** nid: 0x%x (0x%x)", nid, stub.s_nid + i*4); - LOG_NOTICE(LOADER, "*** text: 0x%x (0x%x)", text, stub.s_text + i*4); -#endif - vm::write32(stub.s_text + i*4, (u32)tbl + i*8); - - auto out_tbl = vm::ptr::make((u32)tbl + i * 8); - out_tbl[0] = (u32)dst + i*section; - out_tbl[1] = Emu.GetModuleManager().GetFuncNumById(nid); - - auto out_dst = vm::ptr::make((u32)dst + i * section); - out_dst[0] = OR(11, 2, 2, 0); - out_dst[1] = SC(2); - out_dst[2] = BLR(); - } - } -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, ""); -#endif + e.second = stub; } - break; - } -#ifdef LOADER_DEBUG - LOG_NOTICE(LOADER, ""); -#endif - } - return true; -} - -bool ELF64Loader::LoadShdrData(u64 offset) -{ - u64 max_addr = 0; - - for(uint i=0; i max_addr) max_addr = shdr.sh_addr + shdr.sh_size; - - if((shdr.sh_flags & SHF_ALLOC) != SHF_ALLOC) continue; - - const u64 addr = shdr.sh_addr; - const u64 size = shdr.sh_size; - - if(size == 0 || !Memory.IsGoodAddr(offset + addr, (u32)size)) continue; - - if(shdr.sh_addr && shdr.sh_addr < min_addr) + handler::error_code elf64::load() { - min_addr = (u32)shdr.sh_addr; + if (is_sprx()) + { + sprx_info info; + return load_sprx(info); + } + + Emu.m_sdk_version = -1; + + //store elf to memory + vm::ps3::init(); + + std::vector start_funcs; + std::vector stop_funcs; + + //load modules + static const char* lle_modules[] = { "lv2", "sre", "l10n", "gcm_sys", "fs" }; + //TODO: for (auto &module : lle_modules) + char* module; if (0) + { + elf64 sprx_handler; + vfsFile fsprx(std::string("/dev_flash/sys/external/lib") + module + ".sprx"); + + if (fsprx.IsOpened()) + { + sprx_handler.init(fsprx); + + if (sprx_handler.is_sprx()) + { + sprx_info info; + sprx_handler.load_sprx(info); + + std::unordered_map::iterator f; + + if ((f = info.exports.find(0xbc9a0086)) != info.exports.end()) + start_funcs.push_back(f->second); + + if ((f = info.exports.find(0xab779874)) != info.exports.end()) + stop_funcs.push_back(f->second); + + for (auto &e : info.exports) + { + if (e.first != 0xbc9a0086 && e.first != 0xab779874) + Emu.GetModuleManager().register_function(e.first, e.second); + } + } + } + } + + error_code res = load_data(0); + if (res != ok) + return res; + + //initialize process + auto rsx_callback_data = vm::ptr::make(Memory.MainMem.AllocAlign(4 * 4)); + *rsx_callback_data++ = (rsx_callback_data + 1).addr(); + Emu.SetRSXCallback(rsx_callback_data.addr()); + + rsx_callback_data[0] = ADDI(r11, 0, 0x3ff); + rsx_callback_data[1] = SC(2); + rsx_callback_data[2] = BLR(); + + auto ppu_thr_exit_data = vm::ptr::make(Memory.MainMem.AllocAlign(3 * 4)); + ppu_thr_exit_data[0] = ADDI(r11, 0, 41); + ppu_thr_exit_data[1] = SC(2); + ppu_thr_exit_data[2] = BLR(); + Emu.SetPPUThreadExit(ppu_thr_exit_data.addr()); + + auto ppu_thr_stop_data = vm::ptr::make(Memory.MainMem.AllocAlign(2 * 4)); + ppu_thr_stop_data[0] = SC(4); + ppu_thr_stop_data[1] = BLR(); + Emu.SetPPUThreadStop(ppu_thr_stop_data.addr()); + + //vm::write64(Memory.PRXMem.AllocAlign(0x10000), 0xDEADBEEFABADCAFE); + /* + //TODO + static const int branch_size = 6 * 4; + auto make_branch = [](vm::ptr& ptr, u32 addr) + { + u32 stub = vm::read32(addr); + u32 rtoc = vm::read32(addr + 4); + + *ptr++ = implicts::LI(r0, stub >> 16); + *ptr++ = ORIS(r0, r0, stub & 0xffff); + *ptr++ = implicts::LI(r2, rtoc >> 16); + *ptr++ = ORIS(r2, r2, rtoc & 0xffff); + *ptr++ = MTCTR(r0); + *ptr++ = BCTRL(); + }; + + auto entry = vm::ptr::make(vm::alloc(branch_size * (start_funcs.size() + 1), vm::main)); + + auto OPD = vm::ptr::make(vm::alloc(2 * 4)); + OPD[0] = entry.addr(); + OPD[1] = 0; + + for (auto &f : start_funcs) + { + make_branch(entry, f); + } + + make_branch(entry, m_ehdr.e_entry); + */ + + ppu_thread(m_ehdr.e_entry, "main_thread").args({ Emu.GetPath()/*, "-emu"*/ }).run(); + + return ok; } - if(shdr.sh_addr + shdr.sh_size > max_addr) + handler::error_code elf64::load_data(u64 offset) { - max_addr = (u32)(shdr.sh_addr + shdr.sh_size); - } + for (auto &phdr : m_phdrs) + { + switch (phdr.p_type) + { + case 0x00000001: //LOAD + if (phdr.p_memsz) + { + if (!vm::alloc(phdr.p_vaddr.addr(), (u32)phdr.p_memsz, vm::main)) + { + LOG_ERROR(LOADER, "%s(): AllocFixed(0x%llx, 0x%x) failed", __FUNCTION__, phdr.p_vaddr, (u32)phdr.p_memsz); - if((shdr.sh_type == SHT_RELA) || (shdr.sh_type == SHT_REL)) - { - LOG_ERROR(LOADER, "ELF64 ERROR: Relocation"); - continue; - } + return loading_error; + } - switch(shdr.sh_type) - { - case SHT_NOBITS: - //LOG_WARNING(LOADER, "SHT_NOBITS: addr=0x%llx, size=0x%llx", offset + addr, size); - //memset(&Memory[offset + addr], 0, size); - break; + if (phdr.p_filesz) + { + m_stream->Seek(handler::get_stream_offset() + phdr.p_offset); + m_stream->Read(phdr.p_vaddr.get_ptr(), phdr.p_filesz); + Emu.GetSFuncManager().StaticAnalyse(phdr.p_vaddr.get_ptr(), (u32)phdr.p_filesz, phdr.p_vaddr.addr()); + } + } + break; - case SHT_PROGBITS: - //elf64_f.Seek(shdr.sh_offset); - //elf64_f.Read(&Memory[offset + addr], shdr.sh_size); - break; + case 0x00000007: //TLS + Emu.SetTLSData(phdr.p_vaddr.addr(), phdr.p_filesz.value(), phdr.p_memsz.value()); + break; + + case 0x60000001: //LOOS+1 + if (phdr.p_filesz) + { + const sys_process_param& proc_param = *(sys_process_param*)phdr.p_vaddr.get_ptr(); + + if (proc_param.size < sizeof(sys_process_param)) + { + LOG_WARNING(LOADER, "Bad process_param size! [0x%x : 0x%x]", proc_param.size, sizeof(sys_process_param)); + } + if (proc_param.magic != 0x13bcc5f6) + { + LOG_ERROR(LOADER, "Bad process_param magic! [0x%x]", proc_param.magic); + } + else + { + sys_process_param_info& info = Emu.GetInfo().GetProcParam(); + /* + LOG_NOTICE(LOADER, "*** sdk version: 0x%x", info.sdk_version.ToLE()); + LOG_NOTICE(LOADER, "*** primary prio: %d", info.primary_prio.ToLE()); + LOG_NOTICE(LOADER, "*** primary stacksize: 0x%x", info.primary_stacksize.ToLE()); + LOG_NOTICE(LOADER, "*** malloc pagesize: 0x%x", info.malloc_pagesize.ToLE()); + LOG_NOTICE(LOADER, "*** ppc seg: 0x%x", info.ppc_seg.ToLE()); + //LOG_NOTICE(LOADER, "*** crash dump param addr: 0x%x", info.crash_dump_param_addr.ToLE()); + */ + + info = proc_param.info; + Emu.m_sdk_version = info.sdk_version; + } + } + break; + + case 0x60000002: //LOOS+2 + if (phdr.p_filesz) + { + const sys_proc_prx_param& proc_prx_param = *(sys_proc_prx_param*)phdr.p_vaddr.get_ptr(); + + if (proc_prx_param.magic != 0x1b434cec) + { + LOG_ERROR(LOADER, "Bad magic! (0x%x)", proc_prx_param.magic.ToLE()); + break; + } + + for (auto stub = proc_prx_param.libstubstart; stub < proc_prx_param.libstubend; ++stub) + { + const std::string module_name = stub->s_modulename.get_ptr(); + Module* module = Emu.GetModuleManager().GetModuleByName(module_name); + if (module) + { + //module->SetLoaded(); + } + else + { + LOG_WARNING(LOADER, "Unknown module '%s'", module_name.c_str()); + } + + static const u32 tbl_section_size = 2 * 4; + static const u32 dst_section_size = 3 * 4; + auto& tbl = ptr::make(alloc(stub->s_imports * tbl_section_size)); + auto& dst = ptr::make(alloc(stub->s_imports * dst_section_size)); + + for (u32 i = 0; i < stub->s_imports; ++i) + { + const u32 nid = *stub->s_nid++; + + if (!Emu.GetModuleManager().get_function_stub(nid, stub->s_text[i])) + { + stub->s_text[i] = tbl.addr(); + + *tbl++ = dst.addr(); + *tbl++ = Emu.GetModuleManager().GetFuncNumById(nid); + + *dst++ = MR(11, 2); + *dst++ = SC(2); + *dst++ = BLR(); + + if (module && !module->Load(nid)) + { + LOG_WARNING(LOADER, "Unimplemented function '%s' in '%s' module (HLE)", SysCalls::GetHLEFuncName(nid).c_str(), module_name.c_str()); + } + else //if (Ini.HLELogging.GetValue()) + { + LOG_NOTICE(LOADER, "Imported function '%s' in '%s' module (HLE)", SysCalls::GetHLEFuncName(nid).c_str(), module_name.c_str()); + } + } + else + { + //Is function auto exported, than we can use it + LOG_NOTICE(LOADER, "Imported function '%s' in '%s' module (LLE: 0x%x)", SysCalls::GetHLEFuncName(nid).c_str(), module_name.c_str(), (u32)stub->s_text[i]); + } + } + } + } + break; + } + } + + return ok; } } - - return true; } diff --git a/rpcs3/Loader/ELF64.h b/rpcs3/Loader/ELF64.h index 7843b9f569..a5b2cde70b 100644 --- a/rpcs3/Loader/ELF64.h +++ b/rpcs3/Loader/ELF64.h @@ -1,102 +1,157 @@ #pragma once #include "Loader.h" +#include struct vfsStream; class rFile; -struct Elf64_Ehdr +namespace loader { - u32 e_magic; - u8 e_class; - u8 e_data; - u8 e_curver; - u8 e_os_abi; - u64 e_abi_ver; - u16 e_type; - u16 e_machine; - u32 e_version; - u64 e_entry; - u64 e_phoff; - u64 e_shoff; - u32 e_flags; - u16 e_ehsize; - u16 e_phentsize; - u16 e_phnum; - u16 e_shentsize; - u16 e_shnum; - u16 e_shstrndx; + namespace handlers + { + class elf64 : public handler + { + public: + struct ehdr + { + be_t e_magic; + u8 e_class; + u8 e_data; + u8 e_curver; + u8 e_os_abi; + be_t e_abi_ver; + be_t e_type; + be_t e_machine; + be_t e_version; + be_t e_entry; + be_t e_phoff; + be_t e_shoff; + be_t e_flags; + be_t e_ehsize; + be_t e_phentsize; + be_t e_phnum; + be_t e_shentsize; + be_t e_shnum; + be_t e_shstrndx; - void Load(vfsStream& f); + bool check() const { return e_magic.ToBE() == se32(0x7F454C46); } + } m_ehdr; - void Show(); + struct phdr + { + be_t p_type; + be_t p_flags; + be_t p_offset; + bptr p_vaddr; + bptr p_paddr; + be_t p_filesz; + be_t p_memsz; + be_t p_align; + }; - bool CheckMagic() const { return e_magic == 0x7F454C46; } - u64 GetEntry() const { return e_entry; } -}; + struct shdr + { + be_t sh_name; + be_t sh_type; + be_t sh_flags; + bptr sh_addr; + be_t sh_offset; + be_t sh_size; + be_t sh_link; + be_t sh_info; + be_t sh_addralign; + be_t sh_entsize; + }; -struct Elf64_Shdr -{ - u32 sh_name; - u32 sh_type; - u64 sh_flags; - u64 sh_addr; - u64 sh_offset; - u64 sh_size; - u32 sh_link; - u32 sh_info; - u64 sh_addralign; - u64 sh_entsize; + struct sprx_module_info + { + be_t attr; + u8 version[2]; + char name[28]; + be_t toc_addr; + be_t export_start; + be_t export_end; + be_t import_start; + be_t import_end; + } m_sprx_module_info; - void Load(vfsStream& f); + struct sprx_export_info + { + u8 size; + u8 padding; + be_t version; + be_t attr; + be_t func_count; + be_t vars_count; + be_t tls_vars_count; + be_t hash_info; + be_t tls_hash_info; + u8 reserved[2]; + be_t lib_name_offset; + be_t nid_offset; + be_t stub_offset; + }; - void Show(); -}; + struct sprx_import_info + { + u8 size; + u8 unused; + be_t version; + be_t attr; + be_t func_count; + be_t vars_count; + be_t tls_vars_count; + u8 reserved[4]; + be_t lib_name_offset; + be_t nid_offset; + be_t stub_offset; + //... + }; -struct Elf64_Phdr -{ - u32 p_type; - u32 p_flags; - u64 p_offset; - u64 p_vaddr; - u64 p_paddr; - u64 p_filesz; - u64 p_memsz; - u64 p_align; + struct sprx_function_info + { + be_t name_table_offset; + be_t entry_table_offset; + be_t padding; + } m_sprx_function_info; - void Load(vfsStream& f); + struct sprx_lib_info + { + std::string name; + }; - void Show(); -}; + struct sprx_segment_info + { + vm::ptr begin; + u32 size; + u32 size_file; + vm::ptr initial_addr; + std::vector modules; + }; -class ELF64Loader : public LoaderBase -{ - vfsStream& elf64_f; + struct sprx_info + { + std::unordered_map exports; + std::unordered_map imports; + std::vector segments; + }; -public: - Elf64_Ehdr ehdr; - std::vector shdr_name_arr; - std::vector shdr_arr; - std::vector phdr_arr; + std::vector m_phdrs; + std::vector m_shdrs; - ELF64Loader(vfsStream& f); - ~ELF64Loader() {Close();} + std::vector m_sprx_segments_info; + std::vector m_sprx_import_info; + std::vector m_sprx_export_info; - virtual bool LoadInfo(); - virtual bool LoadData(u64 offset = 0); - virtual bool Close(); + public: + virtual ~elf64() = default; - bool LoadEhdrInfo(s64 offset=-1); - bool LoadPhdrInfo(s64 offset=-1); - bool LoadShdrInfo(s64 offset=-1); - -private: - bool LoadEhdrData(u64 offset); - bool LoadPhdrData(u64 offset); - bool LoadShdrData(u64 offset); - - //bool LoadImports(); -}; - -void WriteEhdr(rFile& f, Elf64_Ehdr& ehdr); -void WritePhdr(rFile& f, Elf64_Phdr& phdr); -void WriteShdr(rFile& f, Elf64_Shdr& shdr); + error_code init(vfsStream& stream) override; + error_code load() override; + error_code load_data(u64 offset); + error_code load_sprx(sprx_info& info); + bool is_sprx() const { return m_ehdr.e_type == 0xffa4; } + std::string sprx_get_module_name() const { return m_sprx_module_info.name; } + }; + } +} \ No newline at end of file diff --git a/rpcs3/Loader/Loader.cpp b/rpcs3/Loader/Loader.cpp index ed804f1f40..e2567a60f0 100644 --- a/rpcs3/Loader/Loader.cpp +++ b/rpcs3/Loader/Loader.cpp @@ -1,11 +1,38 @@ #include "stdafx.h" #include "Utilities/Log.h" #include "Loader.h" -#include "ELF.h" -#include "SELF.h" #include "PSF.h" #include "Emu/FS/vfsLocalFile.h" +namespace loader +{ + bool loader::load(vfsStream& stream) + { + for (auto i : m_handlers) + { + if (i->init(stream) == handler::ok) + { + if (i->load() == handler::ok) + { + return true; + } + } + + stream.Seek(i->get_stream_offset()); + } + + return false; + } + + handler::error_code handler::init(vfsStream& stream) + { + m_stream_offset = stream.Tell(); + m_stream = &stream; + + return ok; + } +}; + static const u64 g_spu_offset = 0x10000; const std::string Ehdr_DataToString(const u8 data) @@ -13,7 +40,7 @@ const std::string Ehdr_DataToString(const u8 data) if(data > 1) return fmt::Format("%d's complement, big endian", data); if(data < 1) return "Data is not found"; - return fmt::Format("%d's complement, small endian", data); + return fmt::Format("%d's complement, little endian", data); } const std::string Ehdr_TypeToString(const u16 type) @@ -35,7 +62,7 @@ const std::string Ehdr_OS_ABIToString(const u8 os_abi) case 0x66: return "Cell OS LV-2"; }; - return fmt::Format("Unknown (0x%x)", os_abi); + return fmt::Format("Unknown (%x)", os_abi); } const std::string Ehdr_MachineToString(const u16 machine) @@ -48,7 +75,7 @@ const std::string Ehdr_MachineToString(const u16 machine) case MACHINE_ARM: return "ARM"; }; - return fmt::Format("Unknown (0x%x)", machine); + return fmt::Format("Unknown (%x)", machine); } const std::string Phdr_FlagsToString(u32 flags) @@ -73,7 +100,7 @@ const std::string Phdr_FlagsToString(u32 flags) flags &= ~spu << 0x14; flags &= ~rsx << 0x18; - if(flags != 0) return fmt::Format("Unknown %s PPU[0x%x] SPU[0x%x] RSX[0x%x]", ret.c_str(), ppu, spu, rsx); + if(flags != 0) return fmt::Format("Unknown %s PPU[%x] SPU[%x] RSX[%x]", ret.c_str(), ppu, spu, rsx); ret += "PPU[" + FLAGS_TO_STRING(ppu) + "] "; ret += "SPU[" + FLAGS_TO_STRING(spu) + "] "; @@ -93,94 +120,5 @@ const std::string Phdr_TypeToString(const u32 type) case 0x60000002: return "LOOS+2"; }; - return fmt::Format("Unknown (0x%x)", type); -} - -Loader::Loader() - : m_stream(nullptr) - , m_loader(nullptr) -{ -} - -Loader::Loader(vfsFileBase& stream) - : m_stream(&stream) - , m_loader(nullptr) -{ -} - -Loader::~Loader() -{ - delete m_loader; - m_loader = nullptr; -} - -void Loader::Open(vfsFileBase& stream) -{ - m_stream = &stream; -} - -LoaderBase* Loader::SearchLoader() -{ - if(!m_stream) - return nullptr; - - LoaderBase* l = new ELFLoader(*m_stream); - if(l->LoadInfo()) - return l; - delete l; - - l = new SELFLoader(*m_stream); - if(l->LoadInfo()) - return l; - delete l; - - return nullptr; -} - -bool Loader::Analyze() -{ - delete m_loader; - - m_loader = SearchLoader(); - - if(!m_loader) - { - LOG_ERROR(LOADER, "Unknown file type"); - return false; - } - - machine = m_loader->GetMachine(); - entry = m_loader->GetMachine() == MACHINE_SPU ? m_loader->GetEntry() + g_spu_offset : m_loader->GetEntry(); - - return true; -} - -bool Loader::Load() -{ - if(!m_loader) - return false; - - if(!m_loader->LoadData(m_loader->GetMachine() == MACHINE_SPU ? g_spu_offset : 0)) - { - LOG_ERROR(LOADER, "Broken file"); - return false; - } - - /* - const std::string& root = fmt::ToUTF8(wxFileName(wxFileName(m_stream->GetPath()).GetPath()).GetPath()); - std::string ps3_path; - const std::string& psf_path = root + "/" + "PARAM.SFO"; - vfsFile f(psf_path); - if(f.IsOpened()) - { - PSFLoader psf_l(f); - if(psf_l.Load()) - { - CurGameInfo = psf_l.m_info; - CurGameInfo.root = root; - psf_l.Close(); - } - } - */ - return true; + return fmt::Format("Unknown (%x)", type); } diff --git a/rpcs3/Loader/Loader.h b/rpcs3/Loader/Loader.h index 0f14d38ab9..3e6fbf925a 100644 --- a/rpcs3/Loader/Loader.h +++ b/rpcs3/Loader/Loader.h @@ -1,6 +1,8 @@ #pragma once +#include "Emu/Memory/vm.h" struct vfsFileBase; +struct vfsStream; class rFile; #ifdef _DEBUG @@ -65,22 +67,7 @@ struct sys_process_param sys_process_param_info info; }; -struct sys_proc_prx_param -{ - be_t size; - be_t magic; - be_t version; - be_t pad0; - be_t libentstart; - be_t libentend; - be_t libstubstart; - be_t libstubend; - be_t ver; - be_t pad1; - be_t pad2; -}; - -struct Elf64_StubHeader +struct sys_stub { u8 s_size; // = 0x2c u8 s_unk0; @@ -89,59 +76,80 @@ struct Elf64_StubHeader be_t s_imports; be_t s_unk2; // = 0x0 be_t s_unk3; // = 0x0 - be_t s_modulename; - be_t s_nid; - be_t s_text; + vm::bptr s_modulename; + vm::bptr s_nid; + vm::bptr s_text; be_t s_unk4; // = 0x0 be_t s_unk5; // = 0x0 be_t s_unk6; // = 0x0 be_t s_unk7; // = 0x0 }; -class LoaderBase +struct sys_proc_prx_param { -protected: - u32 entry; - u32 min_addr; - u32 max_addr; - Elf_Machine machine; - - LoaderBase() - : machine(MACHINE_Unknown) - , entry(0) - , min_addr(0) - , max_addr(0) - { - } - -public: - virtual ~LoaderBase() = default; - - virtual bool LoadInfo() { return false; } - virtual bool LoadData(u64 offset = 0) { return false; } - Elf_Machine GetMachine() const { return machine; } - - u32 GetEntry() const { return entry; } - u32 GetMinAddr() const { return min_addr; } - u32 GetMaxAddr() const { return max_addr; } + be_t size; + be_t magic; + be_t version; + be_t pad0; + be_t libentstart; + be_t libentend; + vm::bptr libstubstart; + vm::bptr libstubend; + be_t ver; + be_t pad1; + be_t pad2; }; -class Loader : public LoaderBase +namespace loader { - vfsFileBase* m_stream; - LoaderBase* m_loader; + class handler + { + u64 m_stream_offset; -public: - Loader(); - Loader(vfsFileBase& stream); - virtual ~Loader(); + protected: + vfsStream* m_stream; - void Open(const std::string& path); - void Open(vfsFileBase& stream); - bool Analyze(); + public: + enum error_code + { + bad_version = -1, + bad_file = -2, + broken_file = -3, + loading_error = -4, + bad_relocation_type = -5, + ok = 0, + }; - bool Load(); + virtual ~handler() = default; -private: - LoaderBase* SearchLoader(); -}; \ No newline at end of file + virtual error_code init(vfsStream& stream); + virtual error_code load() = 0; + u64 get_stream_offset() const + { + return m_stream_offset; + } + }; + + class loader + { + std::vector m_handlers; + + public: + ~loader() + { + for (auto &h : m_handlers) + { + delete h; + } + } + + void register_handler(handler* handler) + { + m_handlers.push_back(handler); + } + + bool load(vfsStream& stream); + }; + + using namespace vm; +} diff --git a/rpcs3/Loader/SELF.cpp b/rpcs3/Loader/SELF.cpp deleted file mode 100644 index 0d3755cce9..0000000000 --- a/rpcs3/Loader/SELF.cpp +++ /dev/null @@ -1,99 +0,0 @@ -#include "stdafx.h" -#include "Utilities/Log.h" -#include "Emu/FS/vfsStream.h" -#include "SELF.h" -#include "ELF64.h" - -void SceHeader::Load(vfsStream& f) -{ - se_magic = Read32(f); - se_hver = Read32(f); - se_flags = Read16(f); - se_type = Read16(f); - se_meta = Read32(f); - se_hsize = Read64(f); - se_esize = Read64(f); -} - -void SceHeader::Show() -{ - LOG_NOTICE(LOADER, "Magic: %08x", se_magic); - LOG_NOTICE(LOADER, "Class: %s", "SELF"); - LOG_NOTICE(LOADER, "hver: 0x%08x", se_hver); - LOG_NOTICE(LOADER, "flags: 0x%04x", se_flags); - LOG_NOTICE(LOADER, "type: 0x%04x", se_type); - LOG_NOTICE(LOADER, "meta: 0x%08x", se_meta); - LOG_NOTICE(LOADER, "hsize: 0x%llx", se_hsize); - LOG_NOTICE(LOADER, "esize: 0x%llx", se_esize); -} - -void SelfHeader::Load(vfsStream& f) -{ - se_htype = Read64(f); - se_appinfooff = Read64(f); - se_elfoff = Read64(f); - se_phdroff = Read64(f); - se_shdroff = Read64(f); - se_secinfoff = Read64(f); - se_sceveroff = Read64(f); - se_controloff = Read64(f); - se_controlsize = Read64(f); - pad = Read64(f); -} - -void SelfHeader::Show() -{ - LOG_NOTICE(LOADER, "header type: 0x%llx", se_htype); - LOG_NOTICE(LOADER, "app info offset: 0x%llx", se_appinfooff); - LOG_NOTICE(LOADER, "elf offset: 0x%llx", se_elfoff); - LOG_NOTICE(LOADER, "program header offset: 0x%llx", se_phdroff); - LOG_NOTICE(LOADER, "section header offset: 0x%llx", se_shdroff); - LOG_NOTICE(LOADER, "section info offset: 0x%llx", se_secinfoff); - LOG_NOTICE(LOADER, "sce version offset: 0x%llx", se_sceveroff); - LOG_NOTICE(LOADER, "control info offset: 0x%llx", se_controloff); - LOG_NOTICE(LOADER, "control info size: 0x%llx", se_controlsize); -} - -SELFLoader::SELFLoader(vfsStream& f) - : self_f(f) - , LoaderBase() -{ -} - -bool SELFLoader::LoadInfo() -{ - if(!self_f.IsOpened()) return false; - self_f.Seek(0); - sce_hdr.Load(self_f); - self_hdr.Load(self_f); - if(!sce_hdr.CheckMagic()) return false; - - return true; -} - -bool SELFLoader::LoadData(u64 offset) -{ - if(!self_f.IsOpened()) return false; - - sce_hdr.Show(); - self_hdr.Show(); - - ELF64Loader l(self_f); - if( !l.LoadEhdrInfo(self_hdr.se_elfoff) || - !l.LoadPhdrInfo(self_hdr.se_phdroff) || - !l.LoadShdrInfo(self_hdr.se_shdroff) || - !l.LoadData(self_hdr.se_appinfooff) ) - { - LOG_ERROR(LOADER, "Broken SELF file."); - - return false; - } - - machine = l.GetMachine(); - entry = l.GetEntry(); - - return true; - - LOG_ERROR(LOADER, "Boot SELF not supported yet!"); - return false; -} \ No newline at end of file diff --git a/rpcs3/Loader/SELF.h b/rpcs3/Loader/SELF.h deleted file mode 100644 index 3c4e7b9fbe..0000000000 --- a/rpcs3/Loader/SELF.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include "Loader.h" - -struct vfsStream; - -struct SceHeader -{ - u32 se_magic; - u32 se_hver; - u16 se_flags; - u16 se_type; - u32 se_meta; - u64 se_hsize; - u64 se_esize; - - void Load(vfsStream& f); - - void Show(); - - bool CheckMagic() const { return se_magic == 0x53434500; } -}; - -struct SelfHeader -{ - u64 se_htype; - u64 se_appinfooff; - u64 se_elfoff; - u64 se_phdroff; - u64 se_shdroff; - u64 se_secinfoff; - u64 se_sceveroff; - u64 se_controloff; - u64 se_controlsize; - u64 pad; - - void Load(vfsStream& f); - - void Show(); -}; - -class SELFLoader : public LoaderBase -{ - vfsStream& self_f; - - SceHeader sce_hdr; - SelfHeader self_hdr; - -public: - SELFLoader(vfsStream& f); - - virtual bool LoadInfo(); - virtual bool LoadData(u64 offset = 0); -}; \ No newline at end of file diff --git a/rpcs3/Loader/TROPUSR.cpp b/rpcs3/Loader/TROPUSR.cpp index 6569ae3fd3..7d6b7fb655 100644 --- a/rpcs3/Loader/TROPUSR.cpp +++ b/rpcs3/Loader/TROPUSR.cpp @@ -133,7 +133,7 @@ bool TROPUSRLoader::Generate(const std::string& filepath, const std::string& con { if (n->GetName() == "trophy") { - u32 trophy_id = atoi(n->GetAttribute("id").c_str()); + u32 trophy_id = std::atoi(n->GetAttribute("id").c_str()); u32 trophy_grade; switch (((const char *)n->GetAttribute("ttype").c_str())[0]) { diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 2da63c0f47..969839a54d 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -239,13 +239,11 @@ - - @@ -459,13 +457,11 @@ - - diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index b2ee29df86..05645dd26e 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -356,9 +356,6 @@ Emu\Memory - - Loader - Loader @@ -374,9 +371,6 @@ Loader - - Loader - Loader @@ -988,9 +982,6 @@ Emu\Memory - - Loader - Loader @@ -1006,9 +997,6 @@ Loader - - Loader - Loader diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index e692156a04..946b6cb66e 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -173,6 +173,7 @@ + @@ -212,6 +213,7 @@ + diff --git a/rpcs3/rpcs3.vcxproj.filters b/rpcs3/rpcs3.vcxproj.filters index 18a836398f..5fce1aa362 100644 --- a/rpcs3/rpcs3.vcxproj.filters +++ b/rpcs3/rpcs3.vcxproj.filters @@ -90,6 +90,9 @@ Gui + + Gui + @@ -185,5 +188,8 @@ Gui + + Gui + \ No newline at end of file diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h index 88e422e2a3..2ca972bc49 100644 --- a/rpcs3/stdafx.h +++ b/rpcs3/stdafx.h @@ -50,7 +50,10 @@ typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; -#define AlignAddr(addr, align) (((addr) + ((align) - 1)) & ~((align) - 1)) +template __forceinline T align(const T addr, int align) +{ + return (addr + (align - 1)) & ~(align - 1); +} #include "Utilities/StrFmt.h" #include "Utilities/BEType.h" From 06f3df0ff465d77b5f52a3a0563474a16d63ffac Mon Sep 17 00:00:00 2001 From: raven02 Date: Wed, 19 Nov 2014 23:17:29 +0800 Subject: [PATCH 25/27] Check contentInfoPath && usrdirPath is not null for cellGameContentPermit --- rpcs3/Emu/SysCalls/Modules/cellGame.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rpcs3/Emu/SysCalls/Modules/cellGame.cpp b/rpcs3/Emu/SysCalls/Modules/cellGame.cpp index 2e320f6a49..e8b8ee0cb6 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellGame.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellGame.cpp @@ -180,6 +180,12 @@ int cellGameContentPermit(vm::ptr contentInfoPath, vm: { cellGame->Warning("cellGameContentPermit(contentInfoPath_addr=0x%x, usrdirPath_addr=0x%x)", contentInfoPath.addr(), usrdirPath.addr()); + + if (!contentInfoPath && !usrdirPath) + { + cellGame->Warning("cellGameContentPermit(): CELL_GAME_ERROR_PARAM"); + return CELL_GAME_ERROR_PARAM; + } if (contentInfo == "" && usrdir == "") { From cdfef3bf9e27021dace0ff45c9104ed5e834188b Mon Sep 17 00:00:00 2001 From: DHrpcs3 Date: Thu, 20 Nov 2014 21:41:04 +0200 Subject: [PATCH 26/27] Fixed compilation errors Implemented converter_le_be --- Utilities/BEType.h | 73 +++++++-- rpcs3/Emu/CPU/CPUThread.h | 6 +- rpcs3/Emu/Cell/PPCThread.h | 2 +- rpcs3/Emu/Cell/PPUInstrTable.h | 4 +- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 6 +- rpcs3/Emu/Cell/PPUThread.cpp | 48 +++++- rpcs3/Emu/Cell/PPUThread.h | 37 +---- rpcs3/Emu/Memory/vm.cpp | 2 +- rpcs3/Emu/Memory/vm.h | 12 -- rpcs3/Emu/Memory/vm_ptr.h | 26 ++-- rpcs3/Emu/SysCalls/Modules.cpp | 2 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 2 +- rpcs3/Emu/SysCalls/Modules/cellSync.cpp | 10 +- rpcs3/Emu/SysCalls/lv2/sys_lwcond.cpp | 2 +- rpcs3/Emu/SysCalls/lv2/sys_spu.h | 2 +- rpcs3/Emu/System.h | 8 +- rpcs3/Loader/ELF64.cpp | 7 +- rpcs3/emucore.vcxproj.filters | 183 ++++++++++++----------- 18 files changed, 248 insertions(+), 184 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index cfe3dc5b0f..fbd18f5089 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -490,14 +490,14 @@ private: }; public: - const T& ToBE() const + const type& ToBE() const { return m_data; } type ToLE() const { - return se_t::func(m_data); + return se_t::func(m_data); } void FromBE(const type& value) @@ -507,7 +507,7 @@ public: void FromLE(const type& value) { - m_data = se_t::func(value); + m_data = se_t::func(value); } static be_t MakeFromLE(const type value) @@ -533,14 +533,14 @@ public: return is_le_machine ? ToLE() : ToBE(); } - be_t() = default; - be_t(const be_t& value) = default; - - be_t(type value) - { - m_data = se_t::func(value); - } + //be_t() = default; + //be_t(const be_t& value) = default; + //be_t(type value) + //{ + // m_data = se_t::func(value); + //} + be_t& operator = (const be_t& value) = default; be_t& operator = (type value) @@ -558,7 +558,8 @@ public: template operator const be_t() const { - return _convert sizeof(T)) ? 1 : (sizeof(T1) < sizeof(T) ? 2 : 0))>::func(m_data); + return be_t::make(value()); + //return _convert sizeof(T)) ? 1 : (sizeof(T1) < sizeof(T) ? 2 : 0))>::func(m_data); } template be_t& operator += (T1 right) { return *this = T(*this) + right; } @@ -644,6 +645,8 @@ public: //be_t if need swap endianes, T otherwise typedef typename _be_type_selector< T, T2, value >::type type; + + typedef typename _be_type_selector< T, T2, !is_be_t::value >::type forced_type; }; template @@ -771,3 +774,51 @@ template __forceinline static void Write64(T& f, const u64 data) { Write64LE(f, re64(data)); } + +template +struct convert_le_be_t +{ + static Tto func(Tfrom&& value) + { + return (Tto)value; + } +}; + +template +struct convert_le_be_t, Tfrom> +{ + static be_t func(Tfrom&& value) + { + return be_t::make(value); + } +}; + +template +struct convert_le_be_t, be_t> +{ + static be_t func(be_t&& value) + { + return value; + } +}; + +template +struct convert_le_be_t> +{ + static Tto func(be_t&& value) + { + return value.value(); + } +}; + +template +__forceinline Tto convert_le_be(Tfrom&& value) +{ + return convert_le_be_t::func(value); +} + +template +__forceinline void convert_le_be(Tto& dst, Tfrom&& src) +{ + dst = convert_le_be_t::func(src) +} \ No newline at end of file diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index d5b2a94eb1..e43b78fe89 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -228,13 +228,13 @@ public: m_call_stack.erase((res + 1).base(), m_call_stack.end()); return; } - + //add a new entry otherwise CallStackItem new_item; - + new_item.branch_pc = pc; new_item.pc = PC; - + m_call_stack.push_back(new_item); } diff --git a/rpcs3/Emu/Cell/PPCThread.h b/rpcs3/Emu/Cell/PPCThread.h index f06cf4eaee..c7e5dab93c 100644 --- a/rpcs3/Emu/Cell/PPCThread.h +++ b/rpcs3/Emu/Cell/PPCThread.h @@ -9,7 +9,7 @@ public: virtual std::string GetThreadName() const { - return (GetFName() + fmt::Format("[0x%08llx]", PC)); + return (GetFName() + fmt::Format("[0x%08x]", PC)); } protected: diff --git a/rpcs3/Emu/Cell/PPUInstrTable.h b/rpcs3/Emu/Cell/PPUInstrTable.h index 250311321e..676f32fa79 100644 --- a/rpcs3/Emu/Cell/PPUInstrTable.h +++ b/rpcs3/Emu/Cell/PPUInstrTable.h @@ -643,8 +643,8 @@ namespace PPU_instr { using namespace lists; - static auto LIS = std::bind(ADDIS, std::placeholders::_1, r0, std::placeholders::_2); - static auto LI = std::bind(ADDI, std::placeholders::_1, r0, std::placeholders::_2); + //static auto LIS = std::bind(ADDIS, std::placeholders::_1, r0, std::placeholders::_2); + //static auto LI = std::bind(ADDI, std::placeholders::_1, r0, std::placeholders::_2); static auto NOP = std::bind(ORI, r0, r0, 0); static auto MR = std::bind(OR, std::placeholders::_1, std::placeholders::_2, std::placeholders::_2, false); static auto BLR = std::bind(BCLR, 0x10 | 0x04, 0, 0, 0); diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index aad5ca6d0f..07354dd068 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -4479,14 +4479,14 @@ void Compiler::SetXerSo(Value * so) { } Value * Compiler::GetUsprg0() { - auto usrpg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, USPRG0)); + auto usrpg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, USPRG)); auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usrpg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); return m_ir_builder->CreateAlignedLoad(usprg0_i64_ptr, 8); } void Compiler::SetUsprg0(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto usprg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, USPRG0)); + auto usprg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, USPRG)); auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usprg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_i64, usprg0_i64_ptr, 8); } @@ -4776,7 +4776,7 @@ void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool val_else_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_else_i32); } - Call("vm.write32", (void(*)(u64, u32))vm::write32, addr_i64, val_else_i32); + Call("vm.write32", (void(*)(u32, u32))vm::write32, addr_i64, val_else_i32); m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(merge_bb); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index e8189828a6..06b2c8b926 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -88,8 +88,8 @@ void PPUThread::InitRegs() GPR[1] = align(m_stack_addr + m_stack_size, 0x200) - 0x200; GPR[2] = rtoc; - GPR[11] = entry; - GPR[12] = Emu.GetMallocPageSize(); + //GPR[11] = entry; + //GPR[12] = Emu.GetMallocPageSize(); GPR[13] = Memory.PRXMem.GetStartAddr() + 0x7060; LR = Emu.GetPPUThreadExit(); @@ -244,4 +244,48 @@ ppu_thread::ppu_thread(u32 entry, const std::string& name, u32 stack_size, u32 p thread->SetPrio(prio ? prio : Emu.GetInfo().GetProcParam().primary_prio); argc = 0; +} + +cpu_thread& ppu_thread::args(std::initializer_list values) +{ + if (!values.size()) + return *this; + + assert(argc == 0); + + envp.set(vm::alloc(align((u32)sizeof(*envp), stack_align), vm::main)); + *envp = 0; + argv.set(vm::alloc(sizeof(*argv) * values.size(), vm::main)); + + for (auto &arg : values) + { + u32 arg_size = align(u32(arg.size() + 1), stack_align); + u32 arg_addr = vm::alloc(arg_size, vm::main); + + std::strcpy(vm::get_ptr(arg_addr), arg.c_str()); + + argv[argc++] = arg_addr; + } + + return *this; +} + +cpu_thread& ppu_thread::run() +{ + thread->Run(); + + gpr(3, argc); + gpr(4, argv.addr()); + gpr(5, envp.addr()); + + return *this; +} + +ppu_thread& ppu_thread::gpr(uint index, u64 value) +{ + assert(index < 32); + + static_cast(thread)->GPR[index] = value; + + return *this; } \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index e7f50f81e6..717dd151f1 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -808,38 +808,7 @@ class ppu_thread : cpu_thread public: ppu_thread(u32 entry, const std::string& name = "", u32 stack_size = 0, u32 prio = 0); - cpu_thread& args(std::initializer_list values) override - { - if (!values.size()) - return *this; - - assert(argc == 0); - - envp.set(vm::alloc(align((u32)sizeof(*envp), stack_align), vm::main)); - *envp = 0; - argv.set(vm::alloc(sizeof(*argv) * values.size(), vm::main)); - - for (auto &arg : values) - { - u32 arg_size = align(u32(arg.size() + 1), stack_align); - u32 arg_addr = vm::alloc(arg_size, vm::main); - - std::strcpy(vm::get_ptr(arg_addr), arg.c_str()); - - argv[argc++] = arg_addr; - } - - return *this; - } - - cpu_thread& run() override - { - thread->Run(); - - static_cast(thread)->GPR[3] = argc; - static_cast(thread)->GPR[4] = argv.addr(); - static_cast(thread)->GPR[5] = envp.addr(); - - return *this; - } + cpu_thread& args(std::initializer_list values) override; + cpu_thread& run() override; + ppu_thread& gpr(uint index, u64 value); }; \ No newline at end of file diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 95e879e6bb..5e274ea22f 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -14,7 +14,7 @@ namespace vm #define MAP_ANONYMOUS MAP_ANON #endif - void* const g_base_addr = ::mmap(nullptr, 0x100000000, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + void* const g_base_addr = mmap(nullptr, 0x100000000, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); #endif bool check_addr(u32 addr) diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index 75a9052412..ac8c1dd12e 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -43,24 +43,12 @@ namespace vm return (T*)((u8*)g_base_addr + addr); } - template - T* const get_ptr(u64 addr) - { - return get_ptr((u32)addr); - } - template T& get_ref(u32 addr) { return *get_ptr(addr); } - template - T& get_ref(u64 addr) - { - return get_ref((u32)addr); - } - namespace ps3 { void init(); diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h index c984b18e0a..f4bb39ac8c 100644 --- a/rpcs3/Emu/Memory/vm_ptr.h +++ b/rpcs3/Emu/Memory/vm_ptr.h @@ -80,7 +80,7 @@ namespace vm template operator const _ptr_base() const { - typename std::remove_const::type addr = m_addr; + typename std::remove_const::type addr = convert_le_be(m_addr); return (_ptr_base&)addr; } @@ -163,7 +163,12 @@ namespace vm return vm::get_ref(m_addr); } - __forceinline T& operator [](AT index) const + __forceinline T& operator [](typename remove_be_t::type index) const + { + return vm::get_ref(m_addr + sizeof(AT)* index); + } + + __forceinline T& operator [](typename to_be_t::forced_type index) const { return vm::get_ref(m_addr + sizeof(AT)* index); } @@ -196,9 +201,10 @@ namespace vm return m_addr; } - void set(AT value) + template + void set(T&& value) { - m_addr = value; + m_addr = convert_le_be(value); } /* @@ -212,7 +218,7 @@ namespace vm template operator const _ptr_base() const { - typename std::remove_const::type addr = m_addr; + typename std::remove_const::type addr = convert_le_be(m_addr); return (_ptr_base&)addr; } @@ -270,14 +276,14 @@ namespace vm template operator const _ptr_base() const { - typename std::remove_const::type addr = m_addr; + typename std::remove_const::type addr = convert_le_be(m_addr); return (_ptr_base&)addr; } template operator const _ptr_base() const { - typename std::remove_const::type addr = m_addr; + typename std::remove_const::type addr = convert_le_be(m_addr); return (_ptr_base&)addr; } @@ -330,7 +336,7 @@ namespace vm template operator const _ptr_base() const { - typename std::remove_const::type addr; addr = m_addr; + typename std::remove_const::type addr = convert_le_be(m_addr); return (_ptr_base&)addr; } @@ -379,7 +385,7 @@ namespace vm template operator const _ptr_base() const { - typename std::remove_const::type addr = m_addr; + typename std::remove_const::type addr = convert_le_be(m_addr); return (_ptr_base&)addr; } @@ -390,7 +396,7 @@ namespace vm operator const std::function() const { - typename std::remove_const::type addr = m_addr; + typename std::remove_const::type addr = convert_le_be(m_addr); return [addr](T... args) -> RT { return make(addr)(args...); }; } diff --git a/rpcs3/Emu/SysCalls/Modules.cpp b/rpcs3/Emu/SysCalls/Modules.cpp index 7951808e2a..e74349e7ae 100644 --- a/rpcs3/Emu/SysCalls/Modules.cpp +++ b/rpcs3/Emu/SysCalls/Modules.cpp @@ -179,7 +179,7 @@ void fix_import(Module* module, u32 func, u32 addr) vm::ptr& ptr = (vm::ptr&)addr; - *ptr++ = LIS(11, func >> 16); + *ptr++ = ADDIS(11, 0, func >> 16); *ptr++ = ORI(11, 11, func & 0xffff); *ptr++ = NOP(); ++ptr; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 9a293f5476..997cd8c5f8 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -1691,7 +1691,7 @@ s64 cellSpursGetWorkloadFlag(vm::ptr spurs, vm::ptrset(be_t::make(Memory.RealToVirtualAddr(&spurs->m.wklFlag))); + flag->set(Memory.RealToVirtualAddr(&spurs->m.wklFlag)); return CELL_OK; } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp index e77004872a..fd68456b2c 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp @@ -290,7 +290,7 @@ s32 syncRwmInitialize(vm::ptr rwm, vm::ptr buffer, u32 buffer } // prx: zeroize first u16 and second u16, write buffer_size in second u32, write buffer_addr in second u64 and sync - rwm->m_size = buffer_size; + rwm->m_size = be_t::make(buffer_size); rwm->m_buffer = buffer; rwm->data.exchange({}); return CELL_OK; @@ -479,9 +479,9 @@ s32 syncQueueInitialize(vm::ptr queue, vm::ptr buffer, u32 si } // prx: zeroize first u64, write size in third u32, write depth in fourth u32, write address in third u64 and sync - queue->m_size = size; - queue->m_depth = depth; - queue->m_buffer = buffer; + queue->m_size = be_t::make(size); + queue->m_depth = be_t::make(depth); + queue->m_buffer.set(buffer.addr()); queue->data.exchange({}); return CELL_OK; } @@ -855,7 +855,7 @@ void syncLFQueueInit(vm::ptr queue, vm::ptr buffer, u32 siz { queue->pop1.write_relaxed({}); queue->push1.write_relaxed({}); - queue->m_buffer.set(queue->m_buffer.addr() | be_t::make(1)); + queue->m_buffer.set(queue->m_buffer.addr() | 1); queue->m_bs[0] = -1; queue->m_bs[1] = -1; //m_bs[2] diff --git a/rpcs3/Emu/SysCalls/lv2/sys_lwcond.cpp b/rpcs3/Emu/SysCalls/lv2/sys_lwcond.cpp index 9c709ba37f..a7d277977b 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_lwcond.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_lwcond.cpp @@ -15,7 +15,7 @@ s32 lwcond_create(sys_lwcond_t& lwcond, sys_lwmutex_t& lwmutex, u64 name_u64) u32 id = sys_lwcond.GetNewId(new Lwcond(name_u64), TYPE_LWCOND); u32 addr = Memory.RealToVirtualAddr(&lwmutex); - lwcond.lwmutex.set(be_t::make(addr)); + lwcond.lwmutex.set(addr); lwcond.lwcond_queue = id; std::string name((const char*)&name_u64, 8); diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.h b/rpcs3/Emu/SysCalls/lv2/sys_spu.h index a07e6d0ca3..6cf66a7bcb 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.h +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.h @@ -90,7 +90,7 @@ struct sys_spu_segment union { - be_t addr; // address or fill value + be_t addr; // address or fill value u64 pad; }; }; diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 7a7aacff96..efabecda1f 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -45,10 +45,10 @@ public: SetTLSData(0, 0, 0); memset(&proc_param, 0, sizeof(sys_process_param_info)); - proc_param.malloc_pagesize = 0x100000; - proc_param.sdk_version = 0x360001; - proc_param.primary_stacksize = 0x100000; - proc_param.primary_prio = 0x50; + proc_param.malloc_pagesize = be_t::make(0x100000); + proc_param.sdk_version = be_t::make(0x360001); + proc_param.primary_stacksize = be_t::make(0x100000); + proc_param.primary_prio = be_t::make(0x50); } void SetTLSData(const u64 addr, const u64 filesz, const u64 memsz) diff --git a/rpcs3/Loader/ELF64.cpp b/rpcs3/Loader/ELF64.cpp index 40e2b80a4d..3c497e20f0 100644 --- a/rpcs3/Loader/ELF64.cpp +++ b/rpcs3/Loader/ELF64.cpp @@ -323,7 +323,7 @@ namespace loader ppu_thr_stop_data[1] = BLR(); Emu.SetPPUThreadStop(ppu_thr_stop_data.addr()); - //vm::write64(Memory.PRXMem.AllocAlign(0x10000), 0xDEADBEEFABADCAFE); + vm::write64(Memory.PRXMem.AllocAlign(0x10000), 0xDEADBEEFABADCAFE); /* //TODO static const int branch_size = 6 * 4; @@ -354,7 +354,10 @@ namespace loader make_branch(entry, m_ehdr.e_entry); */ - ppu_thread(m_ehdr.e_entry, "main_thread").args({ Emu.GetPath()/*, "-emu"*/ }).run(); + ppu_thread main_thread(m_ehdr.e_entry, "main_thread"); + + main_thread.args({ Emu.GetPath()/*, "-emu"*/ }).run(); + main_thread.gpr(11, m_ehdr.e_entry).gpr(12, Emu.GetMallocPageSize()); return ok; } diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 05645dd26e..d4cec623e0 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -33,15 +33,9 @@ {28902cf4-4fa6-428b-ab94-6b410fd5077f} - - {13d20086-2188-425a-9856-0440fe6f79f2} - {037247b4-0370-4193-a25d-bc9d075bf0a7} - - {93b1cff1-0158-4327-a437-e9abcac8d724} - {5a18e5b1-2632-4849-ba94-e7a2ea0b78fa} @@ -60,24 +54,33 @@ {84c34dd1-4c49-4ecf-8ee2-4165c14f24be} - - {fadb4b36-57af-4583-891d-d22ff369e266} - - - {4adca4fa-b90f-4662-9eb0-1d29cf3cd2eb} - - - {6f1da5b2-52c5-416b-9b5c-b9897bc1b300} - {fcac6852-b45f-4cf2-afee-cf56bcea14e5} {ead7494f-a872-4b4d-a864-1a61c3b6012f} - + + {13d20086-2188-425a-9856-0440fe6f79f2} + + + {93b1cff1-0158-4327-a437-e9abcac8d724} + + {1d9e6fc4-9a79-4329-a8b5-081e24822aaa} + + {6674e2ab-90cd-47de-a852-d21643ab18c2} + + + {fadb4b36-57af-4583-891d-d22ff369e266} + + + {4adca4fa-b90f-4662-9eb0-1d29cf3cd2eb} + + + {6f1da5b2-52c5-416b-9b5c-b9897bc1b300} + @@ -309,28 +312,28 @@ Emu\HDD - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell Emu\CPU @@ -339,7 +342,7 @@ Emu\CPU - Emu\ARMv7 + Emu\CPU\ARMv7 Emu\Audio @@ -555,37 +558,37 @@ Emu\SysCalls\Modules - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX Emu\SysCalls\lv2 @@ -624,31 +627,31 @@ Emu\SysCalls\Modules - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\ARMv7 + Emu\CPU\ARMv7 - Emu\ARMv7 + Emu\CPU\ARMv7 - Emu\ARMv7 + Emu\CPU\ARMv7 - Emu\ARMv7\Modules + Emu\CPU\ARMv7\Modules - Emu\ARMv7\Modules + Emu\CPU\ARMv7\Modules - Emu\ARMv7\Modules + Emu\CPU\ARMv7\Modules - Emu\ARMv7\Modules + Emu\CPU\ARMv7\Modules @@ -878,64 +881,64 @@ Emu\HDD - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell - Emu\Cell + Emu\CPU\Cell Emu\CPU @@ -953,19 +956,19 @@ Emu\CPU - Emu\ARMv7 + Emu\CPU\ARMv7 - Emu\ARMv7 + Emu\CPU\ARMv7 - Emu\ARMv7 + Emu\CPU\ARMv7 - Emu\ARMv7 + Emu\CPU\ARMv7 - Emu\ARMv7 + Emu\CPU\ARMv7 Emu\Audio @@ -1121,58 +1124,58 @@ Emu\SysCalls - Emu\RSX\Null + Emu\GPU\RSX\Null - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX\GL + Emu\GPU\RSX\GL - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX - Emu\RSX + Emu\GPU\RSX Emu\SysCalls\lv2 @@ -1253,10 +1256,10 @@ Emu\SysCalls\Modules - Emu\Cell + Emu\CPU\Cell - Emu\ARMv7 + Emu\CPU\ARMv7 \ No newline at end of file From 16b4b9bf40e0165bafc6f20196ffed857e3ba7ce Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Fri, 21 Nov 2014 15:52:01 +0200 Subject: [PATCH 27/27] Fix Travis compiling --- Utilities/BEType.h | 2 +- rpcs3/Emu/CPU/CPUThread.h | 2 +- rpcs3/Emu/Cell/PPUInterpreter.h | 4 ++-- rpcs3/Emu/Memory/vm_ptr.h | 4 ++-- rpcs3/Emu/SysCalls/Modules/cellDmux.cpp | 2 +- rpcs3/Emu/SysCalls/lv2/sys_spu.cpp | 2 +- rpcs3/Emu/System.h | 2 +- rpcs3/Loader/ELF32.cpp | 2 +- rpcs3/Loader/ELF64.cpp | 10 +++++----- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index fbd18f5089..768e25770c 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -820,5 +820,5 @@ __forceinline Tto convert_le_be(Tfrom&& value) template __forceinline void convert_le_be(Tto& dst, Tfrom&& src) { - dst = convert_le_be_t::func(src) + dst = convert_le_be_t::func(src); } \ No newline at end of file diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index e43b78fe89..6bfbc0e8d4 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -292,6 +292,6 @@ public: u32 get_id() const { - thread->GetId(); + return thread->GetId(); } }; \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 90dc8ccbf5..c24509ea0f 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -187,7 +187,7 @@ private: case 0x104: case 0x105: case 0x106: - case 0x107: CPU.USPRG[n - 0x100] = value; break; + case 0x107: CPU.USPRG[n - 0x100] = value; return; case 0x10C: UNK("WriteSPR: Write to time-based SPR. Report this to a developer!"); return; @@ -198,7 +198,7 @@ private: case 0x114: case 0x115: case 0x116: - case 0x117: CPU.SPRG[n - 0x110] = value; break; + case 0x117: CPU.SPRG[n - 0x110] = value; return; } UNK(fmt::Format("WriteSPR error: Unknown SPR 0x%x!", n)); diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h index f4bb39ac8c..223bda5243 100644 --- a/rpcs3/Emu/Memory/vm_ptr.h +++ b/rpcs3/Emu/Memory/vm_ptr.h @@ -201,8 +201,8 @@ namespace vm return m_addr; } - template - void set(T&& value) + template + void set(U&& value) { m_addr = convert_le_be(value); } diff --git a/rpcs3/Emu/SysCalls/Modules/cellDmux.cpp b/rpcs3/Emu/SysCalls/Modules/cellDmux.cpp index 116169406e..5d4acc0f30 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellDmux.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellDmux.cpp @@ -900,7 +900,7 @@ int cellDmuxEnableEs(u32 demuxerHandle, vm::ptr esFil *esHandle = id; cellDmux->Warning("*** New ES(dmux=%d, addr=0x%x, size=0x%x, filter(0x%x, 0x%x, 0x%x, 0x%x), cb=0x%x(arg=0x%x), spec=0x%x): id = %d", - demuxerHandle, es->memAddr, es->memSize, es->fidMajor, es->fidMinor, es->sup1, es->sup2, (u32)esCb->cbEsMsgFunc, es->cbArg, es->spec, id); + demuxerHandle, es->memAddr, es->memSize, es->fidMajor, es->fidMinor, es->sup1, es->sup2, esCb->cbEsMsgFunc.addr(), es->cbArg, es->spec, id); DemuxerTask task(dmuxEnableEs); task.es.es = id; diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp index 1f6a00bbdf..4a4a1a5523 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp @@ -7,7 +7,7 @@ #include "Emu/Cell/RawSPUThread.h" #include "Emu/FS/vfsStreamMemory.h" #include "Emu/FS/vfsFile.h" -#include "Loader/elf32.h" +#include "Loader/ELF32.h" #include "Crypto/unself.h" #include "sys_spu.h" diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index efabecda1f..63469f3467 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -48,7 +48,7 @@ public: proc_param.malloc_pagesize = be_t::make(0x100000); proc_param.sdk_version = be_t::make(0x360001); proc_param.primary_stacksize = be_t::make(0x100000); - proc_param.primary_prio = be_t::make(0x50); + proc_param.primary_prio = be_t::make(0x50); } void SetTLSData(const u64 addr, const u64 filesz, const u64 memsz) diff --git a/rpcs3/Loader/ELF32.cpp b/rpcs3/Loader/ELF32.cpp index aa8d7d2dd3..377ab7f5ae 100644 --- a/rpcs3/Loader/ELF32.cpp +++ b/rpcs3/Loader/ELF32.cpp @@ -36,7 +36,7 @@ namespace loader return broken_file; } - LOG_ERROR(LOADER, "m_ehdr.e_type = 0x%x", (u16)(m_ehdr.is_le() ? m_ehdr.data_le.e_type : m_ehdr.data_be.e_type)); + LOG_WARNING(LOADER, "m_ehdr.e_type = 0x%x", (u16)(m_ehdr.is_le() ? m_ehdr.data_le.e_type : m_ehdr.data_be.e_type)); if (m_ehdr.data_le.e_phnum) { diff --git a/rpcs3/Loader/ELF64.cpp b/rpcs3/Loader/ELF64.cpp index 3c497e20f0..3eb33b8bc2 100644 --- a/rpcs3/Loader/ELF64.cpp +++ b/rpcs3/Loader/ELF64.cpp @@ -43,7 +43,7 @@ namespace loader return broken_file; } - LOG_ERROR(LOADER, "m_ehdr.e_type = 0x%x", m_ehdr.e_type.ToLE()); + LOG_WARNING(LOADER, "m_ehdr.e_type = 0x%x", m_ehdr.e_type.ToLE()); if (m_ehdr.e_machine != MACHINE_PPC64 && m_ehdr.e_machine != MACHINE_SPU) { @@ -94,7 +94,7 @@ namespace loader { for (auto &phdr : m_phdrs) { - switch (phdr.p_type) + switch (phdr.p_type.ToLE()) { case 0x1: //load if (phdr.p_memsz) @@ -366,7 +366,7 @@ namespace loader { for (auto &phdr : m_phdrs) { - switch (phdr.p_type) + switch (phdr.p_type.ToLE()) { case 0x00000001: //LOAD if (phdr.p_memsz) @@ -448,8 +448,8 @@ namespace loader static const u32 tbl_section_size = 2 * 4; static const u32 dst_section_size = 3 * 4; - auto& tbl = ptr::make(alloc(stub->s_imports * tbl_section_size)); - auto& dst = ptr::make(alloc(stub->s_imports * dst_section_size)); + auto tbl = ptr::make(alloc(stub->s_imports * tbl_section_size)); + auto dst = ptr::make(alloc(stub->s_imports * dst_section_size)); for (u32 i = 0; i < stub->s_imports; ++i) {