From e8582c86550ff6b6376afdc19d5498aa3291a05f Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Thu, 20 Nov 2014 00:42:07 +0530 Subject: [PATCH 01/13] Improved the logic that detects which functions should be recompiled --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 106 ++++++++++----------------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 70 ++++-------------- 2 files changed, 52 insertions(+), 124 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index aad5ca6d0f..c7b1a5663b 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -4923,16 +4923,14 @@ raw_fd_ostream & RecompilationEngine::Log() { } void RecompilationEngine::Task() { - bool work_done_this_iteration = false; - bool work_done_last_iteration = false; + bool is_idling = false; std::chrono::nanoseconds idling_time(0); std::chrono::nanoseconds recompiling_time(0); auto start = std::chrono::high_resolution_clock::now(); while (!TestDestroy() && !Emu.IsStopped()) { - work_done_last_iteration = work_done_this_iteration; - work_done_this_iteration = false; - ExecutionTrace * execution_trace = nullptr; + bool work_done_this_iteration = false; + ExecutionTrace * execution_trace = nullptr; { std::lock_guard lock(m_pending_execution_traces_lock); @@ -4952,28 +4950,29 @@ void RecompilationEngine::Task() { if (!work_done_this_iteration) { // TODO: Reduce the priority of the recompilation engine thread if its set to high priority + } else { + is_idling = false; } - if (!work_done_this_iteration && !work_done_last_iteration) { + if (is_idling) { auto recompiling_start = std::chrono::high_resolution_clock::now(); - // Recompile the function with the most number of compiled fragments - auto candidate = m_function_table.end(); - for (auto function_i = m_function_table.begin(); function_i != m_function_table.end(); function_i++) { - if ((*function_i)->num_compiled_fragments && (*function_i)->blocks.front()->IsFunction() && (*function_i)->blocks.front()->is_compiled) { - if (candidate != m_function_table.end()) { - if ((*function_i)->num_compiled_fragments > (*candidate)->num_compiled_fragments) { - candidate = function_i; - } - } else { - candidate = function_i; + // Recompile the function whose CFG has changed the most since the last time it was compiled + auto candidate = (BlockEntry *)nullptr; + size_t max_diff = 0; + for (auto block : m_block_table) { + if (block->IsFunction() && block->is_compiled) { + auto diff = block->cfg.GetSize() - block->last_compiled_cfg_size; + if (diff > max_diff) { + candidate = block; + max_diff = diff; } } } - if (candidate != m_function_table.end()) { - Log() << "Recompiling: " << (*candidate)->ToString() << "\n"; - CompileBlock(*(*candidate), *((*candidate)->blocks.front())); + if (candidate != nullptr) { + Log() << "Recompiling: " << candidate->ToString() << "\n"; + CompileBlock(*candidate); work_done_this_iteration = true; } @@ -4982,6 +4981,8 @@ void RecompilationEngine::Task() { } if (!work_done_this_iteration) { + is_idling = true; + // Wait a few ms for something to happen auto idling_start = std::chrono::high_resolution_clock::now(); WaitForAnySignal(250); @@ -5013,19 +5014,23 @@ void RecompilationEngine::Task() { } void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) { - auto function_i = m_function_table.end(); - auto execution_trace_id = execution_trace.GetId(); auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); if (processed_execution_trace_i == m_processed_execution_traces.end()) { #ifdef _DEBUG Log() << "Trace: " << execution_trace.ToString() << "\n"; #endif + // Find the function block + BlockEntry key(execution_trace.function_address, execution_trace.function_address); + auto block_i = m_block_table.find(&key); + if (block_i == m_block_table.end()) { + block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); + } + auto function_block = *block_i; + block_i = m_block_table.end(); + auto split_trace = false; std::vector tmp_block_list; - - auto split_trace = false; - auto block_i = m_block_table.end(); for (auto trace_i = execution_trace.entries.begin(); trace_i != execution_trace.entries.end(); trace_i++) { if (trace_i->type == ExecutionTraceEntry::Type::CompiledBlock) { block_i = m_block_table.end(); @@ -5034,21 +5039,9 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution if (block_i == m_block_table.end()) { BlockEntry key(trace_i->GetPrimaryAddress(), execution_trace.function_address); - block_i = m_block_table.find(&key); if (block_i == m_block_table.end()) { block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); - - if (function_i == m_function_table.end()) { - FunctionEntry key(execution_trace.function_address); - function_i = m_function_table.find(&key); - if (function_i == m_function_table.end()) { - function_i = m_function_table.insert(m_function_table.end(), new FunctionEntry(key.address)); - } - } - - // Update the function table - (*function_i)->AddBlock(*block_i); } tmp_block_list.push_back(*block_i); @@ -5062,6 +5055,9 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution } UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace); + if (*block_i != function_block) { + UpdateControlFlowGraph(function_block->cfg, *trace_i, next_trace); + } } processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list))); @@ -5071,13 +5067,7 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution if (!(*i)->is_compiled) { (*i)->num_hits++; if ((*i)->num_hits >= 1000) { // TODO: Make this configurable - if (function_i == m_function_table.end()) { - FunctionEntry key(execution_trace.function_address); - function_i = m_function_table.find(&key); - } - - CompileBlock(*(*function_i), *(*i)); - (*i)->is_compiled = true; + CompileBlock(*(*i)); } } } @@ -5109,38 +5099,18 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const E } } -void RecompilationEngine::CompileBlock(FunctionEntry & function_entry, BlockEntry & block_entry) { +void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { #ifdef _DEBUG Log() << "Compile: " << block_entry.ToString() << "\n"; -#endif - - ControlFlowGraph temp_cfg(block_entry.cfg.start_address, block_entry.cfg.function_address); - ControlFlowGraph * cfg; - if (block_entry.IsFunction()) { - // Form a CFG by merging all the blocks in this function - for (auto block_i = function_entry.blocks.begin(); block_i != function_entry.blocks.end(); block_i++) { - temp_cfg += (*block_i)->cfg; - } - - cfg = &temp_cfg; - } else { - cfg = &block_entry.cfg; - } - -#ifdef _DEBUG Log() << "CFG: " << cfg->ToString() << "\n"; #endif auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, block_entry.IsFunction()); - auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), *cfg, true, + auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg, true, block_entry.IsFunction() ? true : false /*generate_linkable_exits*/); - m_executable_lookup[ordinal] = executable; - - if (block_entry.IsFunction()) { - function_entry.num_compiled_fragments = 0; - } else { - function_entry.num_compiled_fragments++; - } + m_executable_lookup[ordinal] = executable; + block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize(); + block_entry.is_compiled = true; } std::shared_ptr RecompilationEngine::GetInstance() { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 0da0c1835e..3731b345ea 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -214,7 +214,7 @@ namespace ppu_recompiler_llvm { } std::string ToString() const { - auto s = fmt::Format("0x%08X (0x%08X):", start_address, function_address); + auto s = fmt::Format("0x%08X (0x%08X): Size=%u ->", start_address, function_address, GetSize()); for (auto i = instruction_addresses.begin(); i != instruction_addresses.end(); i++) { s += fmt::Format(" 0x%08X", *i); } @@ -237,6 +237,12 @@ namespace ppu_recompiler_llvm { return s; } + + /// Get the size of the CFG. The size is a score of how large the CFG is and increases everytime + /// a node or an edge is added to the CFG. + size_t GetSize() const { + return instruction_addresses.size() + branches.size() + calls.size(); + } }; enum class BranchType { @@ -970,6 +976,9 @@ namespace ppu_recompiler_llvm { /// The current revision number of this function u32 revision; + /// Size of the CFG when it was last compiled + size_t last_compiled_cfg_size; + /// The CFG for this block ControlFlowGraph cfg; @@ -979,13 +988,14 @@ namespace ppu_recompiler_llvm { BlockEntry(u32 start_address, u32 function_address) : num_hits(0) , revision(0) + , last_compiled_cfg_size(0) , is_compiled(false) , cfg(start_address, function_address) { } std::string ToString() const { - return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, IsCompiled=%c", - cfg.start_address, cfg.function_address, num_hits, revision, is_compiled ? 'Y' : 'N'); + return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, LastCompiledCfgSize=%u, IsCompiled=%c", + cfg.start_address, cfg.function_address, num_hits, revision, last_compiled_cfg_size, is_compiled ? 'Y' : 'N'); } bool operator == (const BlockEntry & other) const { @@ -1009,55 +1019,6 @@ namespace ppu_recompiler_llvm { }; }; - /// An entry in the function table - struct FunctionEntry { - /// Address of the function - u32 address; - - /// Number of compiled fragments - u32 num_compiled_fragments; - - /// Blocks in the function - std::list blocks; - - FunctionEntry(u32 address) - : address(address) - , num_compiled_fragments(0) { - } - - void AddBlock(BlockEntry * block_entry) { - auto i = std::find(blocks.begin(), blocks.end(), block_entry); - if (i == blocks.end()) { - if (block_entry->IsFunction()) { - // The first block must be the starting block of the function - blocks.push_front(block_entry); - } else { - blocks.push_back(block_entry); - } - } - } - - std::string ToString() const { - return fmt::Format("0x%08X: NumCompiledFragments=%u, NumBlocks=%u", address, num_compiled_fragments, blocks.size()); - } - - bool operator == (const FunctionEntry & other) const { - return address == other.address; - } - - struct hash { - size_t operator()(const FunctionEntry * f) const { - return f->address; - } - }; - - struct equal_to { - bool operator()(const FunctionEntry * lhs, const FunctionEntry * rhs) const { - return *lhs == *rhs; - } - }; - }; - /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. std::mutex m_pending_execution_traces_lock; @@ -1067,9 +1028,6 @@ namespace ppu_recompiler_llvm { /// Block table std::unordered_set m_block_table; - /// Function table - std::unordered_set m_function_table; - /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. std::unordered_map> m_processed_execution_traces; @@ -1107,7 +1065,7 @@ namespace ppu_recompiler_llvm { void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry); /// Compile a block - void CompileBlock(FunctionEntry & function_entry, BlockEntry & block_entry); + void CompileBlock(BlockEntry & block_entry); /// Mutex used to prevent multiple creation static std::mutex s_mutex; From 13acb06d1a9aad5b92bd77d6008dffbbddea30b3 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Thu, 20 Nov 2014 01:03:51 +0530 Subject: [PATCH 02/13] Implemented ANDC and also added support for TB and TBH in MFSPR --- rpcs3/Emu/Cell/PPUInterpreter.h | 4 +++- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 104418d7bd..d8db9fee2e 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -147,7 +147,8 @@ private: case 0x008: return CPU.LR; case 0x009: return CPU.CTR; case 0x100: return CPU.USPRG0; - case 0x10C: return get_time(); + case 0x10C: CPU.TB = get_time(); return CPU.TB; + case 0x10D: CPU.TB = get_time(); return CPU.TBH; } UNK(fmt::Format("ReadSPR error: Unknown SPR 0x%x!", n)); @@ -165,6 +166,7 @@ private: case 0x009: CPU.CTR = value; return; case 0x100: CPU.USPRG0 = value; return; case 0x10C: UNK("WriteSPR: Write to time-based SPR. Report this to a developer!"); return; + case 0x10D: UNK("WriteSPR: Write to time-based SPR upper. Report this to a developer!"); return; } UNK(fmt::Format("WriteSPR error: Unknown SPR 0x%x!", n)); diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index c7b1a5663b..9828b6a0ce 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -2325,7 +2325,16 @@ void Compiler::CNTLZD(u32 ra, u32 rs, bool rc) { } void Compiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) { - InterpreterCall("ANDC", &PPUInterpreter::ANDC, ra, rs, rb, rc); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + rb_i64 = m_ir_builder->CreateNot(rb_i64); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + //InterpreterCall("ANDC", &PPUInterpreter::ANDC, ra, rs, rb, rc); } void Compiler::TD(u32 to, u32 ra, u32 rb) { @@ -2784,6 +2793,13 @@ void Compiler::MFSPR(u32 rd, u32 spr) { case 0x100: rd_i64 = GetUsprg0(); break; + case 0x10C: + rd_i64 = Call("get_time", get_time); + break; + case 0x10D: + rd_i64 = Call("get_time", get_time); + rd_i64 = m_ir_builder->CreateLShr(rd_i64, 32); + break; default: assert(0); break; From 66610186915085c0b9874a76f8c5ee46c5a0620d Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Thu, 20 Nov 2014 23:38:10 +0530 Subject: [PATCH 03/13] Fixed the unit test framework for the PPU LLVM recompiler. Added tests for ANDC. Fixed an issues that prevented LMW/STMW from being generated properly. --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 22 +++++--- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 2 +- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 61 +++++++++++------------ rpcs3/emucore.vcxproj | 2 +- 4 files changed, 47 insertions(+), 40 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 9828b6a0ce..3ab2ab6bd5 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -82,6 +82,8 @@ Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable InitRotateMask(); s_rotate_mask_inited = true; } + + RunAllTests(); } Compiler::~Compiler() { @@ -4727,10 +4729,13 @@ Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bsw return val_ix; } else { + static u32 next_basic_block_id = 0; + + next_basic_block_id++; auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR)); - auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then"); - auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "else"); - auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge"); + auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("then_%u", next_basic_block_id)); + auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("else_%u", next_basic_block_id)); + auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("merge_%u", next_basic_block_id)); m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb); m_ir_builder->SetInsertPoint(then_bb); @@ -4769,10 +4774,13 @@ void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, val_ix->getType()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_ix, eaddr_ix_ptr, alignment); } else { + static u32 next_basic_block_id; + + next_basic_block_id++; auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR)); - auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then"); - auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "else"); - auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge"); + auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("then_%u", next_basic_block_id)); + auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("else_%u", next_basic_block_id)); + auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("merge_%u", next_basic_block_id)); m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb); m_ir_builder->SetInsertPoint(then_bb); @@ -5118,7 +5126,7 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const E void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { #ifdef _DEBUG Log() << "Compile: " << block_entry.ToString() << "\n"; - Log() << "CFG: " << cfg->ToString() << "\n"; + Log() << "CFG: " << block_entry.cfg.ToString() << "\n"; #endif auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, block_entry.IsFunction()); diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 3731b345ea..fe1371f6d3 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -295,7 +295,7 @@ namespace ppu_recompiler_llvm { Stats GetStats(); /// Execute all tests - void RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter); + void RunAllTests(); protected: void Decode(const u32 code) override; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index 1a1ee9c68a..48e9f27641 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -222,8 +222,8 @@ void Compiler::VerifyInstructionAgainstInterpreter(const char * name, CompilerFn if (interp_output_state.ToString() != recomp_output_state.ToString()) { msg = std::string("Input state:\n") + input_state.ToString() + - std::string("\nOutput state:\n") + recomp_output_state.ToString() + - std::string("\nInterpreter output state:\n") + interp_output_state.ToString(); + std::string("\nOutput state:\n") + recomp_output_state.ToString() + + std::string("\nInterpreter output state:\n") + interp_output_state.ToString(); return false; } @@ -235,48 +235,49 @@ void Compiler::VerifyInstructionAgainstInterpreter(const char * name, CompilerFn void Compiler::RunTest(const char * name, std::function test_case, std::function input, std::function check_result) { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS - // Create the unit test function - m_current_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(), - m_ir_builder->getInt8PtrTy() /*ppu_state*/, - m_ir_builder->getInt64Ty() /*base_addres*/, - m_ir_builder->getInt8PtrTy() /*interpreter*/, nullptr); - m_current_function->setCallingConv(CallingConv::X86_64_Win64); - auto arg_i = m_current_function->arg_begin(); + // Create the function + m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); + m_state.function->setCallingConv(CallingConv::X86_64_Win64); + auto arg_i = m_state.function->arg_begin(); arg_i->setName("ppu_state"); - (++arg_i)->setName("base_address"); + m_state.args[CompileTaskState::Args::State] = arg_i; (++arg_i)->setName("interpreter"); + m_state.args[CompileTaskState::Args::Interpreter] = arg_i; + (++arg_i)->setName("context"); + m_state.args[CompileTaskState::Args::Context] = arg_i; + m_state.current_instruction_address = s_ppu_state->PC; - auto block = BasicBlock::Create(*m_llvm_context, "start", m_current_function); + auto block = BasicBlock::Create(*m_llvm_context, "start", m_state.function); m_ir_builder->SetInsertPoint(block); test_case(); - m_ir_builder->CreateRetVoid(); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); // Print the IR std::string ir; raw_string_ostream ir_ostream(ir); - m_current_function->print(ir_ostream); + m_state.function->print(ir_ostream); LOG_NOTICE(PPU, "[UT %s] LLVM IR:%s", name, ir.c_str()); std::string verify; raw_string_ostream verify_ostream(verify); - if (verifyFunction(*m_current_function, &verify_ostream)) { + if (verifyFunction(*m_state.function, &verify_ostream)) { LOG_ERROR(PPU, "[UT %s] Verification Failed:%s", name, verify.c_str()); return; } // Optimize - m_fpm->run(*m_current_function); + m_fpm->run(*m_state.function); // Print the optimized IR ir = ""; - m_current_function->print(ir_ostream); + m_state.function->print(ir_ostream); LOG_NOTICE(PPU, "[UT %s] Optimized LLVM IR:%s", name, ir.c_str()); // Generate the function MachineCodeInfo mci; - m_execution_engine->runJITOnFunction(m_current_function, &mci); + m_execution_engine->runJITOnFunction(m_state.function, &mci); // Disassemble the generated function auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); @@ -294,10 +295,8 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: // Run the test input(); - std::vector args; - args.push_back(GenericValue(s_ppu_state)); - args.push_back(GenericValue(s_interpreter)); - m_execution_engine->runFunction(m_current_function, args); + auto executable = (Executable)m_execution_engine->getPointerToFunction(m_state.function); + executable(s_ppu_state, s_interpreter, 0); // Verify results std::string msg; @@ -308,17 +307,17 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: LOG_ERROR(PPU, "[UT %s] Test failed. %s", name, msg.c_str()); } - m_execution_engine->freeMachineCodeForFunction(m_current_function); + m_execution_engine->freeMachineCodeForFunction(m_state.function); #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS } -void Compiler::RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter) { +void Compiler::RunAllTests() { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS - s_ppu_state = ppu_state; - s_interpreter = interpreter; + PPUThread ppu_state; + PPUInterpreter interpreter(ppu_state); - PPUState initial_state; - initial_state.Load(*ppu_state, 0x10000); + s_ppu_state = &ppu_state; + s_interpreter = &interpreter; LOG_NOTICE(PPU, "Running Unit Tests"); @@ -595,12 +594,14 @@ void Compiler::RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter) VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 5, 5, 5, 6, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 0, 5, 5, 6, 0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 5, 5, 5, 6, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDC, 0, 5, 5, 6, 7, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDC, 5, 5, 5, 6, 7, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ISYNC, 0, 5); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EIEIO, 0, 5); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRT, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRTS, 0, 5, 0, 1, false); - + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIV, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUB, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADD, 0, 5, 0, 1, 2, false); @@ -616,7 +617,7 @@ void Compiler::RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter) VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCFID, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTID, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIW, 0, 5, 0, 1, false); - + PPUState input; input.SetRandom(0x10000); input.GPR[14] = 10; @@ -766,7 +767,5 @@ void Compiler::RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter) VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 3, input, 5, 23, 25); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 0, input, 0, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 1, input, 14, 23); - - initial_state.Store(*ppu_state); #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS } diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 2da63c0f47..913cfa065f 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -771,7 +771,7 @@ ..\llvm_build\Release\lib - LLVMJIT.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib + LLVMJIT.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib From 5addbcbbb0079eb96ee6ffde4d2b5345cbb7c6e9 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Fri, 21 Nov 2014 00:04:59 +0530 Subject: [PATCH 04/13] Direct UT output to PPULLVMRecompiler.log --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 12 ++++++----- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 26 +++++++++++++---------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 3ab2ab6bd5..7b0b11f217 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -82,8 +82,6 @@ Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable InitRotateMask(); s_rotate_mask_inited = true; } - - RunAllTests(); } Compiler::~Compiler() { @@ -4884,9 +4882,7 @@ RecompilationEngine::RecompilationEngine() : ThreadBase("PPU Recompilation Engine") , m_next_ordinal(0) , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) { - std::string error; - m_log = new raw_fd_ostream("PPULLVMRecompiler.log", error, sys::fs::F_Text); - m_log->SetUnbuffered(); + m_compiler.RunAllTests(); } RecompilationEngine::~RecompilationEngine() { @@ -4943,6 +4939,12 @@ void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { } raw_fd_ostream & RecompilationEngine::Log() { + if (!m_log) { + std::string error; + m_log = new raw_fd_ostream("PPULLVMRecompiler.log", error, sys::fs::F_Text); + m_log->SetUnbuffered(); + } + return *m_log; } diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index 48e9f27641..bfea16c180 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -235,6 +235,8 @@ void Compiler::VerifyInstructionAgainstInterpreter(const char * name, CompilerFn void Compiler::RunTest(const char * name, std::function test_case, std::function input, std::function check_result) { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS + m_recompilation_engine.Log() << "Running test " << name << '\n'; + // Create the function m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); m_state.function->setCallingConv(CallingConv::X86_64_Win64); @@ -258,12 +260,12 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: std::string ir; raw_string_ostream ir_ostream(ir); m_state.function->print(ir_ostream); - LOG_NOTICE(PPU, "[UT %s] LLVM IR:%s", name, ir.c_str()); + m_recompilation_engine.Log() << "LLVM IR:" << ir; - std::string verify; - raw_string_ostream verify_ostream(verify); - if (verifyFunction(*m_state.function, &verify_ostream)) { - LOG_ERROR(PPU, "[UT %s] Verification Failed:%s", name, verify.c_str()); + std::string verify_results; + raw_string_ostream verify_results_ostream(verify_results); + if (verifyFunction(*m_state.function, &verify_results_ostream)) { + m_recompilation_engine.Log() << "Verification Failed:" << verify_results; return; } @@ -273,7 +275,7 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: // Print the optimized IR ir = ""; m_state.function->print(ir_ostream); - LOG_NOTICE(PPU, "[UT %s] Optimized LLVM IR:%s", name, ir.c_str()); + m_recompilation_engine.Log() << "Optimized LLVM IR:" << ir; // Generate the function MachineCodeInfo mci; @@ -282,12 +284,12 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: // Disassemble the generated function auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); - LOG_NOTICE(PPU, "[UT %s] Disassembly:", name); + m_recompilation_engine.Log() << "Disassembly:\n"; for (uint64_t pc = 0; pc < mci.size();) { char str[1024]; auto size = LLVMDisasmInstruction(disassembler, (uint8_t *)mci.address() + pc, mci.size() - pc, (uint64_t)((uint8_t *)mci.address() + pc), str, sizeof(str)); - LOG_NOTICE(PPU, "[UT %s] %p: %s.", name, (uint8_t *)mci.address() + pc, str); + m_recompilation_engine.Log() << ((uint8_t *)mci.address() + pc) << ':' << str << '\n'; pc += size; } @@ -302,9 +304,9 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: std::string msg; bool pass = check_result(msg); if (pass) { - LOG_NOTICE(PPU, "[UT %s] Test passed. %s", name, msg.c_str()); + m_recompilation_engine.Log() << "Test " << name << " passed\n" << msg << "\n"; } else { - LOG_ERROR(PPU, "[UT %s] Test failed. %s", name, msg.c_str()); + m_recompilation_engine.Log() << "Test " << name << " failed\n" << msg << "\n"; } m_execution_engine->freeMachineCodeForFunction(m_state.function); @@ -319,7 +321,7 @@ void Compiler::RunAllTests() { s_ppu_state = &ppu_state; s_interpreter = &interpreter; - LOG_NOTICE(PPU, "Running Unit Tests"); + m_recompilation_engine.Log() << "Starting Unit Tests\n"; VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFVSCR, 0, 5, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTVSCR, 0, 5, 1); @@ -767,5 +769,7 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 3, input, 5, 23, 25); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 0, input, 0, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 1, input, 14, 23); + + m_recompilation_engine.Log() << "Finished Unit Tests\n"; #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS } From 921ee1464b7b4529bc37bb92bdc2c00062ee95a8 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 23 Nov 2014 19:06:20 +0530 Subject: [PATCH 05/13] Implemented some vector instructions --- rpcs3/Emu/Cell/PPUInterpreter.h | 26 +++- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 148 +++++++++++++++++++--- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 29 ++++- 3 files changed, 175 insertions(+), 28 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index d8db9fee2e..04fad178c6 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -807,9 +807,15 @@ private: float result = CPU.VPR[vb]._f[w] * nScale; if (result > 0x7fffffff) + { CPU.VPR[vd]._s32[w] = (int)0x7fffffff; + CPU.VSCR.SAT = 1; + } else if (result < -pow(2, 31)) + { CPU.VPR[vd]._s32[w] = (int)0x80000000; + CPU.VSCR.SAT = 1; + } else // C rounding = Round towards 0 CPU.VPR[vd]._s32[w] = (int)result; } @@ -821,24 +827,30 @@ private: for (uint w = 0; w < 4; w++) { // C rounding = Round towards 0 - s64 result = (s64)(CPU.VPR[vb]._f[w] * nScale); + float result = CPU.VPR[vb]._f[w] * nScale; if (result > 0xffffffffu) + { CPU.VPR[vd]._u32[w] = 0xffffffffu; + CPU.VSCR.SAT = 1; + } else if (result < 0) + { CPU.VPR[vd]._u32[w] = 0; + CPU.VSCR.SAT = 1; + } else CPU.VPR[vd]._u32[w] = (u32)result; } } void VEXPTEFP(u32 vd, u32 vb) { - // vd = exp(vb * log(2)) + // vd = 2^x // ISA : Note that the value placed into the element of vD may vary between implementations // and between different executions on the same implementation. for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = exp(CPU.VPR[vb]._f[w] * log(2.0f)); + CPU.VPR[vd]._f[w] = powf(2.0f, CPU.VPR[vb]._f[w]); } } void VLOGEFP(u32 vd, u32 vb) @@ -847,7 +859,7 @@ private: // and between different executions on the same implementation. for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = log(CPU.VPR[vb]._f[w]) / log(2.0f); + CPU.VPR[vd]._f[w] = log2(CPU.VPR[vb]._f[w]); } } void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) @@ -906,7 +918,8 @@ private: { for (uint h = 0; h < 8; h++) { - s32 result = (s32)CPU.VPR[va]._s16[h] * (s32)CPU.VPR[vb]._s16[h] + (s32)CPU.VPR[vc]._s16[h]; + s32 result = (s32)CPU.VPR[va]._s16[h] * (s32)CPU.VPR[vb]._s16[h]; + result = (result >> 15) + (s32)CPU.VPR[vc]._s16[h]; if (result > INT16_MAX) { @@ -926,7 +939,8 @@ private: { for (uint h = 0; h < 8; h++) { - s32 result = (s32)CPU.VPR[va]._s16[h] * (s32)CPU.VPR[vb]._s16[h] + (s32)CPU.VPR[vc]._s16[h] + 0x4000; + s32 result = ((s32)CPU.VPR[va]._s16[h] * (s32)CPU.VPR[vb]._s16[h]) + 0x4000; + result = (result >> 15) + (s32)CPU.VPR[vc]._s16[h]; if (result > INT16_MAX) { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 7b0b11f217..0516c3c146 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -288,7 +288,7 @@ void Compiler::NULL_OP() { } void Compiler::NOP() { - InterpreterCall("NOP", &PPUInterpreter::NOP); + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::TDI(u32 to, u32 ra, s32 simm16) { @@ -725,19 +725,47 @@ void Compiler::VCMPGTUW_(u32 vd, u32 va, u32 vb) { } void Compiler::VCTSXS(u32 vd, u32 uimm5, u32 vb) { - InterpreterCall("VCTSXS", &PPUInterpreter::VCTSXS, vd, uimm5, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + if (uimm5) { + vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); + } + + auto res_v4i32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_cvtps2dq), vb_v4f32); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0x7FFFFFFF))); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateXor(cmp_v4i32, res_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT } void Compiler::VCTUXS(u32 vd, u32 uimm5, u32 vb) { - InterpreterCall("VCTUXS", &PPUInterpreter::VCTUXS, vd, uimm5, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + if (uimm5) { + vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); + } + + auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0))); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0xFFFFFFFFu))); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto res_v4i32 = m_ir_builder->CreateFPToUI(res_v4f32, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT } void Compiler::VEXPTEFP(u32 vd, u32 vb) { - InterpreterCall("VEXPTEFP", &PPUInterpreter::VEXPTEFP, vd, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::pow, VectorType::get(m_ir_builder->getFloatTy(), 4)), + m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 2.0f)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VLOGEFP(u32 vd, u32 vb) { - InterpreterCall("VLOGEFP", &PPUInterpreter::VLOGEFP, vd, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::log2, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { @@ -798,11 +826,46 @@ void Compiler::VMAXUW(u32 vd, u32 va, u32 vb) { } void Compiler::VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { - InterpreterCall("VMHADDSHS", &PPUInterpreter::VMHADDSHS, vd, va, vb, vc); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); + + u32 mask1_v4i32[4] = {0, 1, 2, 3}; + auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = {4, 5, 6, 7}; + auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); + SetVr(vd, res_v8i16); + + // TODO: Set VSCR.SAT } void Compiler::VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { - InterpreterCall("VMHRADDSHS", &PPUInterpreter::VMHRADDSHS, vd, va, vb, vc); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(0x4000))); + res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); + + u32 mask1_v4i32[4] = {0, 1, 2, 3}; + auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = {4, 5, 6, 7}; + auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); + SetVr(vd, res_v8i16); + + // TODO: Set VSCR.SAT } void Compiler::VMINFP(u32 vd, u32 va, u32 vb) { @@ -855,7 +918,12 @@ void Compiler::VMINUW(u32 vd, u32 va, u32 vb) { } void Compiler::VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) { - InterpreterCall("VMLADDUHM", &PPUInterpreter::VMLADDUHM, vd, va, vb, vc); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + res_v8i16 = m_ir_builder->CreateAdd(res_v8i16, vc_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMRGHB(u32 vd, u32 va, u32 vb) { @@ -1010,35 +1078,83 @@ void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { } void Compiler::VMULESB(u32 vd, u32 va, u32 vb) { - InterpreterCall("VMULESB", &PPUInterpreter::VMULESB, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMULESH(u32 vd, u32 va, u32 vb) { - InterpreterCall("VMULESH", &PPUInterpreter::VMULESH, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMULEUB(u32 vd, u32 va, u32 vb) { - InterpreterCall("VMULEUB", &PPUInterpreter::VMULEUB, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMULEUH(u32 vd, u32 va, u32 vb) { - InterpreterCall("VMULEUH", &PPUInterpreter::VMULEUH, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMULOSB(u32 vd, u32 va, u32 vb) { - InterpreterCall("VMULOSB", &PPUInterpreter::VMULOSB, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); + va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMULOSH(u32 vd, u32 va, u32 vb) { - InterpreterCall("VMULOSH", &PPUInterpreter::VMULOSH, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); + va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMULOUB(u32 vd, u32 va, u32 vb) { - InterpreterCall("VMULOUB", &PPUInterpreter::VMULOUB, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); + va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMULOUH(u32 vd, u32 va, u32 vb) { - InterpreterCall("VMULOUH", &PPUInterpreter::VMULOUH, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); + va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index bfea16c180..c2e66cd333 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -8,7 +8,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/MC/MCDisassembler.h" -//#define PPU_LLVM_RECOMPILER_UNIT_TESTS 1 +#define PPU_LLVM_RECOMPILER_UNIT_TESTS 1 using namespace llvm; using namespace ppu_recompiler_llvm; @@ -63,7 +63,7 @@ VerifyInstructionAgainstInterpreter(fmt::Format("%s.%d", #fn, tc).c_str(), &Comp u64 R_ADDR; u64 R_VALUE; - /// Mmeory block + /// Memory block u32 address; u64 mem_block[64]; @@ -130,9 +130,9 @@ VerifyInstructionAgainstInterpreter(fmt::Format("%s.%d", #fn, tc).c_str(), &Comp FPR[i] = (double)rng(); GPR[i] = rng(); VPR[i]._f[0] = (float)rng(); - VPR[i]._f[1] = (float)rng(); - VPR[i]._f[2] = (float)rng(); - VPR[i]._f[3] = (float)rng(); + VPR[i]._f[1] = (float)(rng() & 0x7FFFFFFF); + VPR[i]._f[2] = -(float)(rng() & 0x7FFFFFFF); + VPR[i]._f[3] = -(float)rng(); if (i < 8) { SPRG[i] = rng(); @@ -265,7 +265,7 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: std::string verify_results; raw_string_ostream verify_results_ostream(verify_results); if (verifyFunction(*m_state.function, &verify_results_ostream)) { - m_recompilation_engine.Log() << "Verification Failed:" << verify_results; + m_recompilation_engine.Log() << "Verification Failed:\n" << verify_results; return; } @@ -400,6 +400,12 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW, 5, 5, 0, 1, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW_, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW_, 5, 5, 0, 1, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTSXS, 0, 5, 0, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTSXS, 5, 5, 0, 3, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTUXS, 0, 5, 0, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTUXS, 5, 5, 0, 3, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VEXPTEFP, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VLOGEFP, 0, 5, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMADDFP, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXFP, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSB, 0, 5, 0, 1, 2); @@ -408,6 +414,8 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUW, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMHADDSHS, 0, 5, 0, 1, 2, 3); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMHRADDSHS, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINFP, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSH, 0, 5, 0, 1, 2); @@ -415,6 +423,7 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUW, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMLADDUHM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHW, 0, 5, 0, 1, 2); @@ -426,6 +435,14 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUBM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0, 1, 2, 3); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESB, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESH, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUB, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUH, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSB, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSH, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUB, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0, 1, 2, 3); From 5934132b55ef5d676726760bb12eda015f4b78b6 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 23 Nov 2014 20:47:00 +0530 Subject: [PATCH 06/13] Implemented VPK* --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 68 ++++++++++++++++++++--- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 8 +++ 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 0516c3c146..8fb418d0bb 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -1208,35 +1208,87 @@ void Compiler::VPKPX(u32 vd, u32 va, u32 vb) { } void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKSHSS", &PPUInterpreter::VPKSHSS, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packsswb_128), vb_v8i16, va_v8i16); + SetVr(vd, res_v16i8); + + // TODO: VSCR.SAT } void Compiler::VPKSHUS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKSHUS", &PPUInterpreter::VPKSHUS, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packuswb_128), vb_v8i16, va_v8i16); + SetVr(vd, res_v16i8); + + // TODO: VSCR.SAT } void Compiler::VPKSWSS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKSWSS", &PPUInterpreter::VPKSWSS, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), vb_v4i32, va_v4i32); + SetVr(vd, res_v8i16); + + // TODO: VSCR.SAT } void Compiler::VPKSWUS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKSWUS", &PPUInterpreter::VPKSWUS, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_packusdw), vb_v4i32, va_v4i32); + SetVr(vd, res_v8i16); + + // TODO: VSCR.SAT } void Compiler::VPKUHUM(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKUHUM", &PPUInterpreter::VPKUHUM, vd, va, vb); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + + u32 mask_v16i32[16] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}; + auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, res_v16i8); } void Compiler::VPKUHUS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKUHUS", &PPUInterpreter::VPKUHUS, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); + vb_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); + auto va_v16i8 = m_ir_builder->CreateBitCast(va_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto vb_v16i8 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + + u32 mask_v16i32[16] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}; + auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, res_v16i8); + + // TODO: Set VSCR.SAT } void Compiler::VPKUWUM(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKUWUM", &PPUInterpreter::VPKUWUM, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + + u32 mask_v8i32[8] = {0, 2, 4, 6, 8, 10, 12, 14}; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, res_v8i16); } void Compiler::VPKUWUS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKUWUS", &PPUInterpreter::VPKUWUS, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); + vb_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); + auto va_v8i16 = m_ir_builder->CreateBitCast(va_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + auto vb_v8i16 = m_ir_builder->CreateBitCast(vb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + + u32 mask_v8i32[8] = {0, 2, 4, 6, 8, 10, 12, 14}; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, res_v8i16); + + // TODO: Set VSCR.SAT } void Compiler::VREFP(u32 vd, u32 vb) { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index c2e66cd333..cc87581965 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -446,6 +446,14 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0, 1, 2, 3); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHSS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHUS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWSS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWUS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUHUM, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUHUS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUWUM, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUWUS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VREFP, 0, 5, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSEL, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSL, 0, 5, 0, 1, 2); From d92344f3836fd9bc2aa0846f937d8b495b91577d Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 24 Nov 2014 01:41:25 +0530 Subject: [PATCH 07/13] Implement more instructions in the PPU LLVM recompiler --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 228 +++++++++++++++++++--- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 3 + rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 194 ++++++++++-------- 3 files changed, 317 insertions(+), 108 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 8fb418d0bb..3ed5a6b5ee 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -284,7 +284,7 @@ void Compiler::Decode(const u32 code) { } void Compiler::NULL_OP() { - InterpreterCall("NULL_OP", &PPUInterpreter::NULL_OP); + CompilationError("NULL_OP"); } void Compiler::NOP() { @@ -292,11 +292,11 @@ void Compiler::NOP() { } void Compiler::TDI(u32 to, u32 ra, s32 simm16) { - InterpreterCall("TDI", &PPUInterpreter::TDI, to, ra, simm16); + CompilationError("TDI"); } void Compiler::TWI(u32 to, u32 ra, s32 simm16) { - InterpreterCall("TWI", &PPUInterpreter::TWI, to, ra, simm16); + CompilationError("TWI"); } void Compiler::MFVSCR(u32 vd) { @@ -1298,35 +1298,67 @@ void Compiler::VREFP(u32 vd, u32 vb) { } void Compiler::VRFIM(u32 vd, u32 vb) { - InterpreterCall("VRFIM", &PPUInterpreter::VRFIM, vd, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::floor, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRFIN(u32 vd, u32 vb) { - InterpreterCall("VRFIN", &PPUInterpreter::VRFIN, vd, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::nearbyint, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRFIP(u32 vd, u32 vb) { - InterpreterCall("VRFIP", &PPUInterpreter::VRFIP, vd, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::ceil, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRFIZ(u32 vd, u32 vb) { - InterpreterCall("VRFIZ", &PPUInterpreter::VRFIZ, vd, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::trunc, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRLB(u32 vd, u32 va, u32 vb) { - InterpreterCall("VRLB", &PPUInterpreter::VRLB, vd, va, vb); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(7))); + auto tmp1_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); + vb_v16i8 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(8)), vb_v16i8); + auto tmp2_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); + auto res_v16i8 = m_ir_builder->CreateOr(tmp1_v16i8, tmp2_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VRLH(u32 vd, u32 va, u32 vb) { - InterpreterCall("VRLH", &PPUInterpreter::VRLH, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto tmp1_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); + vb_v8i16 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0x10)), vb_v8i16); + auto tmp2_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); + auto res_v8i16 = m_ir_builder->CreateOr(tmp1_v8i16, tmp2_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VRLW(u32 vd, u32 va, u32 vb) { - InterpreterCall("VRLW", &PPUInterpreter::VRLW, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto tmp1_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); + vb_v4i32 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x20)), vb_v4i32); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); + auto res_v4i32 = m_ir_builder->CreateOr(tmp1_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VRSQRTEFP(u32 vd, u32 vb) { - InterpreterCall("VRSQRTEFP", &PPUInterpreter::VRSQRTEFP, vd, vb); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), res_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VSEL(u32 vd, u32 va, u32 vb, u32 vc) { @@ -1832,7 +1864,8 @@ void Compiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { } void Compiler::DCBI(u32 ra, u32 rb) { - InterpreterCall("DCBI", &PPUInterpreter::DCBI, ra, rb); + // TODO: See if this can be translated to cache flush + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::CRNAND(u32 crbd, u32 crba, u32 crbb) { @@ -2125,7 +2158,7 @@ void Compiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { } void Compiler::TW(u32 to, u32 ra, u32 rb) { - InterpreterCall("TW", &PPUInterpreter::TW, to, ra, rb); + CompilationError("TW"); } void Compiler::LVSL(u32 vd, u32 ra, u32 rb) { @@ -2506,7 +2539,7 @@ void Compiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) { } void Compiler::TD(u32 to, u32 ra, u32 rb) { - InterpreterCall("TD", &PPUInterpreter::TD, to, ra, rb); + CompilationError("TD"); } void Compiler::LVEWX(u32 vd, u32 ra, u32 rb) { @@ -2669,11 +2702,52 @@ void Compiler::STVEBX(u32 vs, u32 ra, u32 rb) { } void Compiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - InterpreterCall("SUBFE", &PPUInterpreter::SUBFE, rd, ra, rb, oe, rc); + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); + res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + } + //InterpreterCall("SUBFE", &PPUInterpreter::SUBFE, rd, ra, rb, oe, rc); } void Compiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - InterpreterCall("ADDE", &PPUInterpreter::ADDE, rd, ra, rb, oe, rc); + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); + res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + } + //InterpreterCall("ADDE", &PPUInterpreter::ADDE, rd, ra, rb, oe, rc); } void Compiler::MTOCRF(u32 l, u32 crm, u32 rs) { @@ -2793,7 +2867,19 @@ void Compiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { } void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { - InterpreterCall("SUBFZE", &PPUInterpreter::SUBFZE, rd, ra, oe, rc); + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto ca_i64 = GetXerCa(); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + //InterpreterCall("SUBFZE", &PPUInterpreter::SUBFZE, rd, ra, oe, rc); } void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) { @@ -2824,7 +2910,27 @@ void Compiler::STVX(u32 vs, u32 ra, u32 rb) { } void Compiler::SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { - InterpreterCall("SUBFME", &PPUInterpreter::SUBFME, rd, ra, oe, rc); + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); + res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + } + //InterpreterCall("SUBFME", &PPUInterpreter::SUBFME, rd, ra, oe, rc); } void Compiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2842,7 +2948,26 @@ void Compiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { } void Compiler::ADDME(u32 rd, u32 ra, u32 oe, bool rc) { - InterpreterCall("ADDME", &PPUInterpreter::ADDME, rd, ra, oe, rc); + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); + res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + } + //InterpreterCall("ADDME", &PPUInterpreter::ADDME, rd, ra, oe, rc); } void Compiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2913,7 +3038,17 @@ void Compiler::LHZX(u32 rd, u32 ra, u32 rb) { } void Compiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) { - InterpreterCall("EQV", &PPUInterpreter::EQV, ra, rs, rb, rc); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); + res_i64 = m_ir_builder->CreateNot(res_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + //InterpreterCall("EQV", &PPUInterpreter::EQV, ra, rs, rb, rc); } void Compiler::ECIWX(u32 rd, u32 ra, u32 rb) { @@ -2991,7 +3126,8 @@ void Compiler::LWAX(u32 rd, u32 ra, u32 rb) { } void Compiler::DST(u32 ra, u32 rb, u32 strm, u32 t) { - InterpreterCall("DST", &PPUInterpreter::DST, ra, rb, strm, t); + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::LHAX(u32 rd, u32 ra, u32 rb) { @@ -3036,7 +3172,8 @@ void Compiler::LWAUX(u32 rd, u32 ra, u32 rb) { } void Compiler::DSTST(u32 ra, u32 rb, u32 strm, u32 t) { - InterpreterCall("DSTST", &PPUInterpreter::DSTST, ra, rb, strm, t); + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::LHAUX(u32 rd, u32 ra, u32 rb) { @@ -3063,7 +3200,16 @@ void Compiler::STHX(u32 rs, u32 ra, u32 rb) { } void Compiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) { - InterpreterCall("ORC", &PPUInterpreter::ORC, ra, rs, rb, rc); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + rb_i64 = m_ir_builder->CreateNot(rb_i64); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + //InterpreterCall("ORC", &PPUInterpreter::ORC, ra, rs, rb, rc); } void Compiler::ECOWX(u32 rs, u32 ra, u32 rb) { @@ -3149,7 +3295,16 @@ void Compiler::MTSPR(u32 spr, u32 rs) { } void Compiler::NAND(u32 ra, u32 rs, u32 rb, bool rc) { - InterpreterCall("NAND", &PPUInterpreter::NAND, ra, rs, rb, rc); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); + res_i64 = m_ir_builder->CreateNot(res_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + //InterpreterCall("NAND", &PPUInterpreter::NAND, ra, rs, rb, rc); } void Compiler::STVXL(u32 vs, u32 ra, u32 rb) { @@ -3537,7 +3692,8 @@ void Compiler::LVRXL(u32 vd, u32 ra, u32 rb) { } void Compiler::DSS(u32 strm, u32 a) { - InterpreterCall("DSS", &PPUInterpreter::DSS, strm, a); + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { @@ -3661,7 +3817,8 @@ void Compiler::EXTSW(u32 ra, u32 rs, bool rc) { } void Compiler::ICBI(u32 ra, u32 rs) { - InterpreterCall("ICBI", &PPUInterpreter::ICBI, ra, rs); + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::DCBZ(u32 ra, u32 rb) { @@ -4050,7 +4207,12 @@ void Compiler::FSQRTS(u32 frd, u32 frb, bool rc) { } void Compiler::FRES(u32 frd, u32 frb, bool rc) { - InterpreterCall("FRES", &PPUInterpreter::FRES, frd, frb, rc); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), rb_f64); + SetFpr(frd, res_f64); + + // TODO: Set flags + //InterpreterCall("FRES", &PPUInterpreter::FRES, frd, frb, rc); } void Compiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { @@ -4241,7 +4403,10 @@ void Compiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) { } void Compiler::FRSQRTE(u32 frd, u32 frb, bool rc) { - InterpreterCall("FRSQRTE", &PPUInterpreter::FRSQRTE, frd, frb, rc); + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); + res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), res_f64); + SetFpr(frd, res_f64); } void Compiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4353,7 +4518,7 @@ void Compiler::FCFID(u32 frd, u32 frb, bool rc) { } void Compiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { - //InterpreterCall("UNK", &PPUInterpreter::UNK, code, opcode, gcode); + CompilationError(fmt::Format("Unknown/Illegal opcode! (0x%08x : 0x%x : 0x%x)", code, opcode, gcode)); } std::string Compiler::GetBasicBlockNameFromAddress(u32 address, const std::string & suffix) const { @@ -5034,6 +5199,11 @@ llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_f return m_ir_builder->CreateCall3(executable_ptr, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); } +void Compiler::CompilationError(const std::string & error) { + LOG_ERROR(PPU, "[0x%08X] %s", m_state.current_instruction_address, error.c_str()); + Emu.Pause(); +} + void Compiler::InitRotateMask() { for (u32 mb = 0; mb < 64; mb++) { for (u32 me = 0; me < 64; me++) { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index fe1371f6d3..cc52a24858 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -930,6 +930,9 @@ namespace ppu_recompiler_llvm { /// Excute a test void RunTest(const char * name, std::function test_case, std::function input, std::function check_result); + /// Handle compilation errors + void CompilationError(const std::string & error); + /// A mask used in rotate instructions static u64 s_rotate_mask[64][64]; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index cc87581965..9acb6ddfc4 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -8,7 +8,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/MC/MCDisassembler.h" -#define PPU_LLVM_RECOMPILER_UNIT_TESTS 1 +//#define PPU_LLVM_RECOMPILER_UNIT_TESTS 1 using namespace llvm; using namespace ppu_recompiler_llvm; @@ -455,6 +455,14 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUWUM, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUWUS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VREFP, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIM, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIN, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIP, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIZ, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLB, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLH, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLW, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRSQRTEFP, 0, 5, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSEL, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSL, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLB, 0, 5, 0, 1, 2); @@ -509,7 +517,7 @@ void Compiler::RunAllTests() { // TODO: BCLR VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRNOR, 0, 5, 0, 7, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRANDC, 0, 5, 5, 6, 7); - // TODO: ISYNC + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ISYNC, 0, 5); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRXOR, 0, 5, 7, 7, 7); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRNAND, 0, 5, 3, 4, 5); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRAND, 0, 5, 1, 2, 3); @@ -539,100 +547,129 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIMI, 5, 5, 21, 22, 23, 43, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDC_LR, 0, 5, 7, 8, 9, 12, 0, 0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDC_LR, 5, 5, 21, 22, 23, 43, 1, 1); - - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 0, 5, 7, 8, 9, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 5, 5, 21, 22, 23, 0, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 0, 5, 7, 8, 9, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 5, 5, 21, 22, 23, 0, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 0, 5, 7, 8, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 5, 5, 21, 22, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 0, 5, 3, 0, 9, 31); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 5, 5, 6, 1, 23, 14); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 0, 5, 0, 1, 2, 0, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 5, 5, 0, 1, 2, 0, true); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 0, 5, 0, 1, 2, 0, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 5, 5, 0, 1, 2, 0, true); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHDU, 0, 5, 7, 8, 9, 0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHDU, 5, 5, 21, 22, 23, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHWU, 0, 5, 7, 8, 9, 0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHWU, 5, 5, 21, 22, 23, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 0, 5, 7, 8, 9, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 5, 5, 21, 22, 23, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 0, 5, 7, 8, 9, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 5, 5, 21, 22, 23, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 0, 5, 7, 8, 9, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 5, 5, 21, 22, 23, 0, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 0, 5, 7, 8, 9, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 5, 5, 21, 22, 23, 0, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 0, 5, 7, 8, 9, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 5, 5, 21, 22, 23, 0, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 0, 5, 7, 8, 9, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 5, 5, 21, 22, 23, 0, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 0, 5, 7, 8, 9, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 5, 5, 21, 22, 23, 0, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 0, 5, 7, 8, 9, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 5, 5, 21, 22, 23, 0, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 0, 5, 7, 8, 9, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 5, 5, 21, 22, 23, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 0, 5, 7, 8, 9, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 5, 5, 21, 22, 23, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 0, 5, 7, 8, 9, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 5, 5, 21, 22, 23, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 0, 5, 7, 8, 9, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 5, 5, 21, 22, 23, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 0, 5, 3, 0, 9, 31); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 5, 5, 6, 1, 23, 14); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 0, 5, 3, 0, 9, 31); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 5, 5, 6, 1, 23, 14); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 0, 5, 0, 1, 2, 0, false); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 5, 5, 0, 1, 2, 0, true); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 0, 5, 0, 1, 2, 0, false); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 5, 5, 0, 1, 2, 0, true); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 0, 5, 3, 5, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 5, 5, 3, 5, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 0, 5, 6, 9, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 5, 5, 6, 9, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 0, 5, 25, 29, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 5, 5, 25, 29, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 0, 5, 0x20, 5); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 5, 5, 0x100, 5); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 10, 5, 0x120, 5); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 15, 5, 0x8, 5); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 0, 5, 5, 0x20); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 5, 5, 5, 0x100); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 10, 5, 5, 0x120); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 15, 5, 5, 0x8); - - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 0, 5, 5, 6, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 5, 5, 5, 6, 12, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 10, 5, 5, 6, 22, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 15, 5, 5, 6, 31, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 0, 5, 5, 6, 7, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 5, 5, 5, 6, 7, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 0, 5, 5, 6, 0, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 5, 5, 5, 6, 12, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 10, 5, 5, 6, 48, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 15, 5, 5, 6, 63, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 0, 5, 5, 6, 7, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 5, 5, 5, 6, 7, 1); + // TODO: MFOCRF VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLW, 0, 5, 5, 6, 7, 0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLW, 5, 5, 5, 6, 7, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 0, 5, 5, 6, 7, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 5, 5, 5, 6, 7, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 0, 5, 5, 6, 7, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 5, 5, 5, 6, 7, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 0, 5, 5, 6, 7, 0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 5, 5, 5, 6, 7, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 0, 5, 5, 6, 0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 5, 5, 5, 6, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 0, 5, 5, 6, 7, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 5, 5, 5, 6, 7, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 0, 5, 3, 0, 9, 31); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 5, 5, 6, 1, 23, 14); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 5, 5, 21, 22, 23, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 0, 5, 5, 6, 0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 5, 5, 5, 6, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDC, 0, 5, 5, 6, 7, 0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDC, 5, 5, 5, 6, 7, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ISYNC, 0, 5); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 0, 5, 7, 8, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 5, 5, 21, 22, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFE, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFE, 5, 5, 21, 22, 23, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDE, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDE, 5, 5, 21, 22, 23, 0, 1); + // TODO: MTOCRF + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDZE, 0, 5, 7, 8, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDZE, 5, 5, 21, 22, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFZE, 0, 5, 7, 8, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFZE, 5, 5, 21, 22, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFME, 0, 5, 7, 8, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFME, 5, 5, 21, 22, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 5, 5, 21, 22, 23, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDME, 0, 5, 7, 8, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDME, 5, 5, 21, 22, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 5, 5, 21, 22, 23, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 5, 5, 21, 22, 23, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EQV, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EQV, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 0, 5, 5, 0x20); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 5, 5, 5, 0x100); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 10, 5, 5, 0x120); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 15, 5, 5, 0x8); + // TODO: MFTB + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORC, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORC, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 5, 5, 21, 22, 23, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 5, 5, 21, 22, 23, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 0, 5, 0x20, 5); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 5, 5, 0x100, 5); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 10, 5, 0x120, 5); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 15, 5, 0x8, 5); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NAND, 0, 5, 7, 8, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NAND, 5, 5, 21, 22, 23, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 5, 5, 21, 22, 23, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 0, 5, 7, 8, 9, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 5, 5, 21, 22, 23, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 0, 5, 5, 6, 7, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 5, 5, 5, 6, 7, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 0, 5, 5, 6, 7, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 5, 5, 5, 6, 7, 1); + // TODO: SYNC + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 0, 5, 5, 6, 7, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 5, 5, 5, 6, 7, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 0, 5, 5, 6, 7, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 5, 5, 5, 6, 7, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 0, 5, 5, 6, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 5, 5, 5, 6, 12, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 10, 5, 5, 6, 22, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 15, 5, 5, 6, 31, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 0, 5, 5, 6, 0, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 5, 5, 5, 6, 12, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 10, 5, 5, 6, 48, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 15, 5, 5, 6, 63, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EIEIO, 0, 5); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 0, 5, 6, 9, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 5, 5, 6, 9, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 0, 5, 3, 5, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 5, 5, 3, 5, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 0, 5, 25, 29, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 5, 5, 25, 29, 1); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRT, 0, 5, 0, 1, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIVS, 0, 5, 0, 1, 2, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUBS, 0, 5, 0, 1, 2, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADDS, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRTS, 0, 5, 0, 1, false); - + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRES, 0, 5, 0, 1, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMULS, 0, 5, 0, 1, 2, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMADDS, 0, 5, 0, 1, 2, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUBS, 0, 5, 0, 1, 2, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUBS, 0, 5, 0, 1, 2, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADDS, 0, 5, 0, 1, 2, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIW, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIV, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUB, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADD, 0, 5, 0, 1, 2, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRT, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMUL, 0, 5, 0, 1, 2, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSQRTE, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUB, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMADD, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUB, 0, 5, 0, 1, 2, 3, false); @@ -641,9 +678,8 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMR, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNABS, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FABS, 0, 5, 0, 1, false); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCFID, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTID, 0, 5, 0, 1, false); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIW, 0, 5, 0, 1, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCFID, 0, 5, 0, 1, false); PPUState input; input.SetRandom(0x10000); From bb1e08a6bef303dcae77ce387c0dc7612e857cfb Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 24 Nov 2014 23:49:06 +0530 Subject: [PATCH 08/13] Remove code left in comments from the PPU LLVM recompiler. Also cause the emulator to pause if unsupported instructions are required to be compiled. --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 435 ++++++++++++--------------- 1 file changed, 185 insertions(+), 250 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 3ed5a6b5ee..79f189fe03 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -1703,7 +1703,6 @@ void Compiler::MULLI(u32 rd, u32 ra, s32 simm16) { auto ra_i64 = GetGpr(ra); auto res_i64 = m_ir_builder->CreateMul(ra_i64, m_ir_builder->getInt64((s64)simm16)); SetGpr(rd, res_i64); - //InterpreterCall("MULLI", &PPUInterpreter::MULLI, rd, ra, simm16); } void Compiler::SUBFIC(u32 rd, u32 ra, s32 simm16) { @@ -1714,7 +1713,6 @@ void Compiler::SUBFIC(u32 rd, u32 ra, s32 simm16) { auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); SetGpr(rd, diff_i64); SetXerCa(carry_i1); - //InterpreterCall("SUBFIC", &PPUInterpreter::SUBFIC, rd, ra, simm16); } void Compiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) { @@ -1726,7 +1724,6 @@ void Compiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) { } SetCrFieldUnsignedCmp(crfd, ra_i64, m_ir_builder->getInt64(uimm16)); - //InterpreterCall("CMPLI", &PPUInterpreter::CMPLI, crfd, l, ra, uimm16); } void Compiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) { @@ -1738,7 +1735,6 @@ void Compiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) { } SetCrFieldSignedCmp(crfd, ra_i64, m_ir_builder->getInt64((s64)simm16)); - //InterpreterCall("CMPI", &PPUInterpreter::CMPI, crfd, l, ra, simm16); } void Compiler::ADDIC(u32 rd, u32 ra, s32 simm16) { @@ -1748,13 +1744,11 @@ void Compiler::ADDIC(u32 rd, u32 ra, s32 simm16) { auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); SetGpr(rd, sum_i64); SetXerCa(carry_i1); - //InterpreterCall("ADDIC", &PPUInterpreter::ADDIC, rd, ra, simm16); } void Compiler::ADDIC_(u32 rd, u32 ra, s32 simm16) { ADDIC(rd, ra, simm16); SetCrFieldSignedCmp(0, GetGpr(rd), m_ir_builder->getInt64(0)); - //InterpreterCall("ADDIC_", &PPUInterpreter::ADDIC_, rd, ra, simm16); } void Compiler::ADDI(u32 rd, u32 ra, s32 simm16) { @@ -1765,7 +1759,6 @@ void Compiler::ADDI(u32 rd, u32 ra, s32 simm16) { auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16)); SetGpr(rd, sum_i64); } - //InterpreterCall("ADDI", &PPUInterpreter::ADDI, rd, ra, simm16); } void Compiler::ADDIS(u32 rd, u32 ra, s32 simm16) { @@ -1776,18 +1769,12 @@ void Compiler::ADDIS(u32 rd, u32 ra, s32 simm16) { auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16 << 16)); SetGpr(rd, sum_i64); } - //InterpreterCall("ADDIS", &PPUInterpreter::ADDIS, rd, ra, simm16); } void Compiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, bd)); auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); CreateBranch(CheckBranchCondition(bo, bi), target_i32, lk ? true : false); - //m_hit_branch_instruction = true; - //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); - //InterpreterCall("BC", &PPUInterpreter::BC, bo, bi, bd, aa, lk); - //SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4)); - //m_ir_builder->CreateRetVoid(); } void Compiler::SC(u32 sc_code) { @@ -1798,10 +1785,6 @@ void Compiler::B(s32 ll, u32 aa, u32 lk) { auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, ll)); auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); CreateBranch(nullptr, target_i32, lk ? true : false); - //m_hit_branch_instruction = true; - //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); - //InterpreterCall("B", &PPUInterpreter::B, ll, aa, lk); - //m_ir_builder->CreateRetVoid(); } void Compiler::MCRF(u32 crfd, u32 crfs) { @@ -1811,7 +1794,6 @@ void Compiler::MCRF(u32 crfd, u32 crfs) { cr_i32 = SetNibble(cr_i32, crfd, crf_i32); SetCr(cr_i32); } - //InterpreterCall("MCRF", &PPUInterpreter::MCRF, crfd, crfs); } void Compiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { @@ -1819,11 +1801,6 @@ void Compiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL); auto lr_i32 = m_ir_builder->CreateTrunc(lr_i64, m_ir_builder->getInt32Ty()); CreateBranch(CheckBranchCondition(bo, bi), lr_i32, lk ? true : false, true); - //m_hit_branch_instruction = true; - //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); - //InterpreterCall("BCLR", &PPUInterpreter::BCLR, bo, bi, bh, lk); - //SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4)); - //m_ir_builder->CreateRetVoid(); } void Compiler::CRNOR(u32 crbd, u32 crba, u32 crbb) { @@ -1834,7 +1811,6 @@ void Compiler::CRNOR(u32 crbd, u32 crba, u32 crbb) { res_i32 = m_ir_builder->CreateXor(res_i32, 1); cr_i32 = SetBit(cr_i32, crbd, res_i32); SetCr(cr_i32); - //InterpreterCall("CRNOR", &PPUInterpreter::CRNOR, crbd, crba, crbb); } void Compiler::CRANDC(u32 crbd, u32 crba, u32 crbb) { @@ -1845,12 +1821,10 @@ void Compiler::CRANDC(u32 crbd, u32 crba, u32 crbb) { res_i32 = m_ir_builder->CreateAnd(ba_i32, res_i32); cr_i32 = SetBit(cr_i32, crbd, res_i32); SetCr(cr_i32); - //InterpreterCall("CRANDC", &PPUInterpreter::CRANDC, crbd, crba, crbb); } void Compiler::ISYNC() { m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); - //InterpreterCall("ISYNC", &PPUInterpreter::ISYNC); } void Compiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { @@ -1860,7 +1834,6 @@ void Compiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); cr_i32 = SetBit(cr_i32, crbd, res_i32); SetCr(cr_i32); - //InterpreterCall("CRXOR", &PPUInterpreter::CRXOR, crbd, crba, crbb); } void Compiler::DCBI(u32 ra, u32 rb) { @@ -1876,7 +1849,6 @@ void Compiler::CRNAND(u32 crbd, u32 crba, u32 crbb) { res_i32 = m_ir_builder->CreateXor(res_i32, 1); cr_i32 = SetBit(cr_i32, crbd, res_i32); SetCr(cr_i32); - //InterpreterCall("CRNAND", &PPUInterpreter::CRNAND, crbd, crba, crbb); } void Compiler::CRAND(u32 crbd, u32 crba, u32 crbb) { @@ -1886,7 +1858,6 @@ void Compiler::CRAND(u32 crbd, u32 crba, u32 crbb) { auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); cr_i32 = SetBit(cr_i32, crbd, res_i32); SetCr(cr_i32); - //InterpreterCall("CRAND", &PPUInterpreter::CRAND, crbd, crba, crbb); } void Compiler::CREQV(u32 crbd, u32 crba, u32 crbb) { @@ -1897,7 +1868,6 @@ void Compiler::CREQV(u32 crbd, u32 crba, u32 crbb) { res_i32 = m_ir_builder->CreateXor(res_i32, 1); cr_i32 = SetBit(cr_i32, crbd, res_i32); SetCr(cr_i32); - //InterpreterCall("CREQV", &PPUInterpreter::CREQV, crbd, crba, crbb); } void Compiler::CRORC(u32 crbd, u32 crba, u32 crbb) { @@ -1908,7 +1878,6 @@ void Compiler::CRORC(u32 crbd, u32 crba, u32 crbb) { res_i32 = m_ir_builder->CreateOr(ba_i32, res_i32); cr_i32 = SetBit(cr_i32, crbd, res_i32); SetCr(cr_i32); - //InterpreterCall("CRORC", &PPUInterpreter::CRORC, crbd, crba, crbb); } void Compiler::CROR(u32 crbd, u32 crba, u32 crbb) { @@ -1918,7 +1887,6 @@ void Compiler::CROR(u32 crbd, u32 crba, u32 crbb) { auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); cr_i32 = SetBit(cr_i32, crbd, res_i32); SetCr(cr_i32); - //InterpreterCall("CROR", &PPUInterpreter::CROR, crbd, crba, crbb); } void Compiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { @@ -1926,11 +1894,6 @@ void Compiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL); auto ctr_i32 = m_ir_builder->CreateTrunc(ctr_i64, m_ir_builder->getInt32Ty()); CreateBranch(CheckBranchCondition(bo, bi), ctr_i32, lk ? true : false); - //m_hit_branch_instruction = true; - //SetPc(m_ir_builder->getInt32(m_current_instruction_address)); - //InterpreterCall("BCCTR", &PPUInterpreter::BCCTR, bo, bi, bh, lk); - //SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4)); - //m_ir_builder->CreateRetVoid(); } void Compiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { @@ -1955,7 +1918,6 @@ void Compiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("RLWIMI", &PPUInterpreter::RLWIMI, ra, rs, sh, mb, me, rc); } void Compiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { @@ -1976,7 +1938,6 @@ void Compiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("RLWINM", &PPUInterpreter::RLWINM, ra, rs, sh, mb, me, rc); } void Compiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, bool rc) { @@ -1996,35 +1957,30 @@ void Compiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("RLWNM", &PPUInterpreter::RLWNM, ra, rs, rb, mb, me, rc); } void Compiler::ORI(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateOr(rs_i64, uimm16); SetGpr(ra, res_i64); - //InterpreterCall("ORI", &PPUInterpreter::ORI, ra, rs, uimm16); } void Compiler::ORIS(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateOr(rs_i64, (u64)uimm16 << 16); SetGpr(ra, res_i64); - //InterpreterCall("ORIS", &PPUInterpreter::ORIS, ra, rs, uimm16); } void Compiler::XORI(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateXor(rs_i64, uimm16); SetGpr(ra, res_i64); - //InterpreterCall("XORI", &PPUInterpreter::XORI, ra, rs, uimm16); } void Compiler::XORIS(u32 ra, u32 rs, u32 uimm16) { auto rs_i64 = GetGpr(rs); auto res_i64 = m_ir_builder->CreateXor(rs_i64, (u64)uimm16 << 16); SetGpr(ra, res_i64); - //InterpreterCall("XORIS", &PPUInterpreter::XORIS, ra, rs, uimm16); } void Compiler::ANDI_(u32 ra, u32 rs, u32 uimm16) { @@ -2032,7 +1988,6 @@ void Compiler::ANDI_(u32 ra, u32 rs, u32 uimm16) { auto res_i64 = m_ir_builder->CreateAnd(rs_i64, uimm16); SetGpr(ra, res_i64); SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - //InterpreterCall("ANDI_", &PPUInterpreter::ANDI_, ra, rs, uimm16); } void Compiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) { @@ -2040,7 +1995,6 @@ void Compiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) { auto res_i64 = m_ir_builder->CreateAnd(rs_i64, (u64)uimm16 << 16); SetGpr(ra, res_i64); SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - //InterpreterCall("ANDIS_", &PPUInterpreter::ANDIS_, ra, rs, uimm16); } void Compiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { @@ -2058,7 +2012,6 @@ void Compiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("RLDICL", &PPUInterpreter::RLDICL, ra, rs, sh, mb, rc); } void Compiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { @@ -2076,7 +2029,6 @@ void Compiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("RLDICR", &PPUInterpreter::RLDICR, ra, rs, sh, me, rc); } void Compiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { @@ -2094,7 +2046,6 @@ void Compiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("RLDIC", &PPUInterpreter::RLDIC, ra, rs, sh, mb, rc); } void Compiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { @@ -2116,7 +2067,6 @@ void Compiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("RLDIMI", &PPUInterpreter::RLDIMI, ra, rs, sh, mb, rc); } void Compiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) { @@ -2139,7 +2089,6 @@ void Compiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("RLDC_LR", &PPUInterpreter::RLDC_LR, ra, rs, rb, m_eb, is_r, rc); } void Compiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { @@ -2154,7 +2103,6 @@ void Compiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { } SetCrFieldSignedCmp(crfd, ra_i64, rb_i64); - //InterpreterCall("CMP", &PPUInterpreter::CMP, crfd, l, ra, rb); } void Compiler::TW(u32 to, u32 ra, u32 rb) { @@ -2192,8 +2140,6 @@ void Compiler::LVSL(u32 vd, u32 ra, u32 rb) { lvsl_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsl_values_v16i8_ptr, index_i64); auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsl_values_v16i8_ptr, 16); SetVr(vd, val_v16i8); - - //InterpreterCall("LVSL", &PPUInterpreter::LVSL, vd, ra, rb); } void Compiler::LVEBX(u32 vd, u32 ra, u32 rb) { @@ -2209,8 +2155,6 @@ void Compiler::LVEBX(u32 vd, u32 ra, u32 rb) { auto vd_v16i8 = GetVrAsIntVec(vd, 8); vd_v16i8 = m_ir_builder->CreateInsertElement(vd_v16i8, val_i8, index_i64); SetVr(vd, vd_v16i8); - - //InterpreterCall("LVEBX", &PPUInterpreter::LVEBX, vd, ra, rb); } void Compiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2229,8 +2173,8 @@ void Compiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { if (oe) { // TODO: Implement this + CompilationError("SUBFCO"); } - //InterpreterCall("SUBFC", &PPUInterpreter::SUBFC, rd, ra, rb, oe, rc); } void Compiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2249,7 +2193,6 @@ void Compiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { if (oe) { // TODO: Implement this } - //InterpreterCall("ADDC", &PPUInterpreter::ADDC, rd, ra, rb, oe, rc); } void Compiler::MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { @@ -2265,8 +2208,6 @@ void Compiler::MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("MULHDU", &PPUInterpreter::MULHDU, rd, ra, rb, rc); } void Compiler::MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { @@ -2281,14 +2222,12 @@ void Compiler::MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("MULHWU", &PPUInterpreter::MULHWU, rd, ra, rb, rc); } void Compiler::MFOCRF(u32 a, u32 rd, u32 crm) { auto cr_i32 = GetCr(); auto cr_i64 = m_ir_builder->CreateZExt(cr_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, cr_i64); - //InterpreterCall("MFOCRF", &PPUInterpreter::MFOCRF, a, rd, crm); } void Compiler::LWARX(u32 rd, u32 ra, u32 rb) { @@ -2311,7 +2250,6 @@ void Compiler::LWARX(u32 rd, u32 ra, u32 rb) { resv_val_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), resv_val_i32); resv_val_i64 = m_ir_builder->CreateZExt(resv_val_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, resv_val_i64); - //InterpreterCall("LWARX", &PPUInterpreter::LWARX, rd, ra, rb); } void Compiler::LDX(u32 rd, u32 ra, u32 rb) { @@ -2323,7 +2261,6 @@ void Compiler::LDX(u32 rd, u32 ra, u32 rb) { auto mem_i64 = ReadMemory(addr_i64, 64); SetGpr(rd, mem_i64); - //InterpreterCall("LDX", &PPUInterpreter::LDX, rd, ra, rb); } void Compiler::LWZX(u32 rd, u32 ra, u32 rb) { @@ -2336,7 +2273,6 @@ void Compiler::LWZX(u32 rd, u32 ra, u32 rb) { auto mem_i32 = ReadMemory(addr_i64, 32); auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LWZX", &PPUInterpreter::LWZX, rd, ra, rb); } void Compiler::SLW(u32 ra, u32 rs, u32 rb, bool rc) { @@ -2353,8 +2289,6 @@ void Compiler::SLW(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("SLW", &PPUInterpreter::SLW, ra, rs, rb, rc); } void Compiler::CNTLZW(u32 ra, u32 rs, bool rc) { @@ -2366,8 +2300,6 @@ void Compiler::CNTLZW(u32 ra, u32 rs, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("CNTLZW", &PPUInterpreter::CNTLZW, ra, rs, rc); } void Compiler::SLD(u32 ra, u32 rs, u32 rb, bool rc) { @@ -2383,8 +2315,6 @@ void Compiler::SLD(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("SLD", &PPUInterpreter::SLD, ra, rs, rb, rc); } void Compiler::AND(u32 ra, u32 rs, u32 rb, bool rc) { @@ -2396,7 +2326,6 @@ void Compiler::AND(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("AND", &PPUInterpreter::AND, ra, rs, rb, rc); } void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { @@ -2411,7 +2340,6 @@ void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { } SetCrFieldUnsignedCmp(crfd, ra_i64, rb_i64); - //InterpreterCall("CMPL", &PPUInterpreter::CMPL, crfd, l, ra, rb); } void Compiler::LVSR(u32 vd, u32 ra, u32 rb) { @@ -2445,8 +2373,6 @@ void Compiler::LVSR(u32 vd, u32 ra, u32 rb) { lvsr_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsr_values_v16i8_ptr, index_i64); auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsr_values_v16i8_ptr, 16); SetVr(vd, val_v16i8); - - //InterpreterCall("LVSR", &PPUInterpreter::LVSR, vd, ra, rb); } void Compiler::LVEHX(u32 vd, u32 ra, u32 rb) { @@ -2464,8 +2390,6 @@ void Compiler::LVEHX(u32 vd, u32 ra, u32 rb) { auto vd_v8i16 = GetVrAsIntVec(vd, 16); vd_v8i16 = m_ir_builder->CreateInsertElement(vd_v8i16, val_i16, index_i64); SetVr(vd, vd_v8i16); - - //InterpreterCall("LVEHX", &PPUInterpreter::LVEHX, vd, ra, rb); } void Compiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2480,8 +2404,8 @@ void Compiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { if (oe) { // TODO: Implement this + CompilationError("SUBFO"); } - //InterpreterCall("SUBF", &PPUInterpreter::SUBF, rd, ra, rb, oe, rc); } void Compiler::LDUX(u32 rd, u32 ra, u32 rb) { @@ -2492,13 +2416,11 @@ void Compiler::LDUX(u32 rd, u32 ra, u32 rb) { auto mem_i64 = ReadMemory(addr_i64, 64); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LDUX", &PPUInterpreter::LDUX, rd, ra, rb); } void Compiler::DCBST(u32 ra, u32 rb) { // TODO: Implement this m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); - //InterpreterCall("DCBST", &PPUInterpreter::DCBST, ra, rb); } void Compiler::LWZUX(u32 rd, u32 ra, u32 rb) { @@ -2510,7 +2432,6 @@ void Compiler::LWZUX(u32 rd, u32 ra, u32 rb) { auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LWZUX", &PPUInterpreter::LWZUX, rd, ra, rb); } void Compiler::CNTLZD(u32 ra, u32 rs, bool rc) { @@ -2521,8 +2442,6 @@ void Compiler::CNTLZD(u32 ra, u32 rs, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("CNTLZD", &PPUInterpreter::CNTLZD, ra, rs, rc); } void Compiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) { @@ -2535,7 +2454,6 @@ void Compiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("ANDC", &PPUInterpreter::ANDC, ra, rs, rb, rc); } void Compiler::TD(u32 to, u32 ra, u32 rb) { @@ -2557,8 +2475,6 @@ void Compiler::LVEWX(u32 vd, u32 ra, u32 rb) { auto vd_v4i32 = GetVrAsIntVec(vd, 32); vd_v4i32 = m_ir_builder->CreateInsertElement(vd_v4i32, val_i32, index_i64); SetVr(vd, vd_v4i32); - - //InterpreterCall("LVEWX", &PPUInterpreter::LVEWX, vd, ra, rb); } void Compiler::MULHD(u32 rd, u32 ra, u32 rb, bool rc) { @@ -2574,8 +2490,6 @@ void Compiler::MULHD(u32 rd, u32 ra, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("MULHD", &PPUInterpreter::MULHD, rd, ra, rb, rc); } void Compiler::MULHW(u32 rd, u32 ra, u32 rb, bool rc) { @@ -2590,7 +2504,6 @@ void Compiler::MULHW(u32 rd, u32 ra, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("MULHW", &PPUInterpreter::MULHW, rd, ra, rb, rc); } void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { @@ -2611,13 +2524,11 @@ void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { resv_val_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt64Ty()), resv_val_i64); SetGpr(rd, resv_val_i64); - //InterpreterCall("LDARX", &PPUInterpreter::LDARX, rd, ra, rb); } void Compiler::DCBF(u32 ra, u32 rb) { // TODO: Implement this m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); - //InterpreterCall("DCBF", &PPUInterpreter::DCBF, ra, rb); } void Compiler::LBZX(u32 rd, u32 ra, u32 rb) { @@ -2630,7 +2541,6 @@ void Compiler::LBZX(u32 rd, u32 ra, u32 rb) { auto mem_i8 = ReadMemory(addr_i64, 8); auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LBZX", &PPUInterpreter::LBZX, rd, ra, rb); } void Compiler::LVX(u32 vd, u32 ra, u32 rb) { @@ -2643,7 +2553,6 @@ void Compiler::LVX(u32 vd, u32 ra, u32 rb) { addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); auto mem_i128 = ReadMemory(addr_i64, 128, 16); SetVr(vd, mem_i128); - //InterpreterCall("LVX", &PPUInterpreter::LVX, vd, ra, rb); } void Compiler::NEG(u32 rd, u32 ra, u32 oe, bool rc) { @@ -2657,8 +2566,8 @@ void Compiler::NEG(u32 rd, u32 ra, u32 oe, bool rc) { if (oe) { // TODO: Implement this + CompilationError("NEGO"); } - //InterpreterCall("NEG", &PPUInterpreter::NEG, rd, ra, oe, rc); } void Compiler::LBZUX(u32 rd, u32 ra, u32 rb) { @@ -2670,7 +2579,6 @@ void Compiler::LBZUX(u32 rd, u32 ra, u32 rb) { auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LBZUX", &PPUInterpreter::LBZUX, rd, ra, rb); } void Compiler::NOR(u32 ra, u32 rs, u32 rb, bool rc) { @@ -2683,7 +2591,6 @@ void Compiler::NOR(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("NOR", &PPUInterpreter::NOR, ra, rs, rb, rc); } void Compiler::STVEBX(u32 vs, u32 ra, u32 rb) { @@ -2698,7 +2605,6 @@ void Compiler::STVEBX(u32 vs, u32 ra, u32 rb) { auto vs_v16i8 = GetVrAsIntVec(vs, 8); auto val_i8 = m_ir_builder->CreateExtractElement(vs_v16i8, index_i64); WriteMemory(addr_i64, val_i8); - //InterpreterCall("STVEBX", &PPUInterpreter::STVEBX, vs, ra, rb); } void Compiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2722,8 +2628,8 @@ void Compiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { if (oe) { // TODO: Implement this + CompilationError("SUBFEO"); } - //InterpreterCall("SUBFE", &PPUInterpreter::SUBFE, rd, ra, rb, oe, rc); } void Compiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2746,8 +2652,8 @@ void Compiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { if (oe) { // TODO: Implement this + CompilationError("ADDEO"); } - //InterpreterCall("ADDE", &PPUInterpreter::ADDE, rd, ra, rb, oe, rc); } void Compiler::MTOCRF(u32 l, u32 crm, u32 rs) { @@ -2768,7 +2674,6 @@ void Compiler::MTOCRF(u32 l, u32 crm, u32 rs) { rs_i32 = m_ir_builder->CreateAnd(rs_i32, ~mask); cr_i32 = m_ir_builder->CreateOr(cr_i32, rs_i32); SetCr(cr_i32); - //InterpreterCall("MTOCRF", &PPUInterpreter::MTOCRF, l, crm, rs); } void Compiler::STDX(u32 rs, u32 ra, u32 rb) { @@ -2779,7 +2684,6 @@ void Compiler::STDX(u32 rs, u32 ra, u32 rb) { } WriteMemory(addr_i64, GetGpr(rs, 64)); - //InterpreterCall("STDX", &PPUInterpreter::STDX, rs, ra, rb); } void Compiler::STWCX_(u32 rs, u32 ra, u32 rb) { @@ -2794,7 +2698,6 @@ void Compiler::STWX(u32 rs, u32 ra, u32 rb) { } WriteMemory(addr_i64, GetGpr(rs, 32)); - //InterpreterCall("STWX", &PPUInterpreter::STWX, rs, ra, rb); } void Compiler::STVEHX(u32 vs, u32 ra, u32 rb) { @@ -2811,7 +2714,6 @@ void Compiler::STVEHX(u32 vs, u32 ra, u32 rb) { auto vs_v8i16 = GetVrAsIntVec(vs, 16); auto val_i16 = m_ir_builder->CreateExtractElement(vs_v8i16, index_i64); WriteMemory(addr_i64, val_i16, 2); - //InterpreterCall("STVEHX", &PPUInterpreter::STVEHX, vs, ra, rb); } void Compiler::STDUX(u32 rs, u32 ra, u32 rb) { @@ -2821,7 +2723,6 @@ void Compiler::STDUX(u32 rs, u32 ra, u32 rb) { WriteMemory(addr_i64, GetGpr(rs, 64)); SetGpr(ra, addr_i64); - //InterpreterCall("STDUX", &PPUInterpreter::STDUX, rs, ra, rb); } void Compiler::STWUX(u32 rs, u32 ra, u32 rb) { @@ -2831,7 +2732,6 @@ void Compiler::STWUX(u32 rs, u32 ra, u32 rb) { WriteMemory(addr_i64, GetGpr(rs, 32)); SetGpr(ra, addr_i64); - //InterpreterCall("STWUX", &PPUInterpreter::STWUX, rs, ra, rb); } void Compiler::STVEWX(u32 vs, u32 ra, u32 rb) { @@ -2848,7 +2748,6 @@ void Compiler::STVEWX(u32 vs, u32 ra, u32 rb) { auto vs_v4i32 = GetVrAsIntVec(vs, 32); auto val_i32 = m_ir_builder->CreateExtractElement(vs_v4i32, index_i64); WriteMemory(addr_i64, val_i32, 4); - //InterpreterCall("STVEWX", &PPUInterpreter::STVEWX, vs, ra, rb); } void Compiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { @@ -2859,11 +2758,15 @@ void Compiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); SetGpr(rd, sum_i64); SetXerCa(carry_i1); - + if (rc) { SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("ADDZE", &PPUInterpreter::ADDZE, rd, ra, oe, rc); + + if (oe) { + // TODO: Implement this + CompilationError("ADDZEO"); + } } void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { @@ -2875,11 +2778,15 @@ void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); SetGpr(rd, res_i64); SetXerCa(carry_i1); - + if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("SUBFZE", &PPUInterpreter::SUBFZE, rd, ra, oe, rc); + + if (oe) { + // TODO: Implement this + CompilationError("SUBFZEO"); + } } void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) { @@ -2894,7 +2801,6 @@ void Compiler::STBX(u32 rs, u32 ra, u32 rb) { } WriteMemory(addr_i64, GetGpr(rs, 8)); - //InterpreterCall("STBX", &PPUInterpreter::STBX, rs, ra, rb); } void Compiler::STVX(u32 vs, u32 ra, u32 rb) { @@ -2906,7 +2812,6 @@ void Compiler::STVX(u32 vs, u32 ra, u32 rb) { addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); WriteMemory(addr_i64, GetVr(vs), 16); - //InterpreterCall("STVX", &PPUInterpreter::STVX, vs, ra, rb); } void Compiler::SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { @@ -2929,8 +2834,8 @@ void Compiler::SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { if (oe) { // TODO: Implement this + CompilationError("SUBFMEO"); } - //InterpreterCall("SUBFME", &PPUInterpreter::SUBFME, rd, ra, oe, rc); } void Compiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2943,8 +2848,10 @@ void Compiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); } - // TODO implement oe - //InterpreterCall("MULLD", &PPUInterpreter::MULLD, rd, ra, rb, oe, rc); + if (oe) { + // TODO implement oe + CompilationError("MULLDO"); + } } void Compiler::ADDME(u32 rd, u32 ra, u32 oe, bool rc) { @@ -2966,8 +2873,8 @@ void Compiler::ADDME(u32 rd, u32 ra, u32 oe, bool rc) { if (oe) { // TODO: Implement this + CompilationError("ADDMEO"); } - //InterpreterCall("ADDME", &PPUInterpreter::ADDME, rd, ra, oe, rc); } void Compiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2982,14 +2889,15 @@ void Compiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); } - // TODO implement oe - //InterpreterCall("MULLW", &PPUInterpreter::MULLW, rd, ra, rb, oe, rc); + if (oe) { + // TODO implement oe + CompilationError("MULLWO"); + } } void Compiler::DCBTST(u32 ra, u32 rb, u32 th) { // TODO: Implement this m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); - //InterpreterCall("DCBTST", &PPUInterpreter::DCBTST, ra, rb, th); } void Compiler::STBUX(u32 rs, u32 ra, u32 rb) { @@ -2999,7 +2907,6 @@ void Compiler::STBUX(u32 rs, u32 ra, u32 rb) { WriteMemory(addr_i64, GetGpr(rs, 8)); SetGpr(ra, addr_i64); - //InterpreterCall("STBUX", &PPUInterpreter::STBUX, rs, ra, rb); } void Compiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -3014,14 +2921,13 @@ void Compiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { if (oe) { // TODO: Implement this + CompilationError("ADDO"); } - //InterpreterCall("ADD", &PPUInterpreter::ADD, rd, ra, rb, oe, rc); } void Compiler::DCBT(u32 ra, u32 rb, u32 th) { // TODO: Implement this using prefetch m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); - //InterpreterCall("DCBT", &PPUInterpreter::DCBT, ra, rb, th); } void Compiler::LHZX(u32 rd, u32 ra, u32 rb) { @@ -3034,7 +2940,6 @@ void Compiler::LHZX(u32 rd, u32 ra, u32 rb) { auto mem_i16 = ReadMemory(addr_i64, 16); auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LHZX", &PPUInterpreter::LHZX, rd, ra, rb); } void Compiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3047,8 +2952,6 @@ void Compiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("EQV", &PPUInterpreter::EQV, ra, rs, rb, rc); } void Compiler::ECIWX(u32 rd, u32 ra, u32 rb) { @@ -3064,7 +2967,6 @@ void Compiler::LHZUX(u32 rd, u32 ra, u32 rb) { auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LHZUX", &PPUInterpreter::LHZUX, rd, ra, rb); } void Compiler::XOR(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3076,7 +2978,6 @@ void Compiler::XOR(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("XOR", &PPUInterpreter::XOR, ra, rs, rb, rc); } void Compiler::MFSPR(u32 rd, u32 spr) { @@ -3109,7 +3010,6 @@ void Compiler::MFSPR(u32 rd, u32 spr) { } SetGpr(rd, rd_i64); - //InterpreterCall("MFSPR", &PPUInterpreter::MFSPR, rd, spr); } void Compiler::LWAX(u32 rd, u32 ra, u32 rb) { @@ -3122,7 +3022,6 @@ void Compiler::LWAX(u32 rd, u32 ra, u32 rb) { auto mem_i32 = ReadMemory(addr_i64, 32); auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LWAX", &PPUInterpreter::LWAX, rd, ra, rb); } void Compiler::DST(u32 ra, u32 rb, u32 strm, u32 t) { @@ -3140,12 +3039,10 @@ void Compiler::LHAX(u32 rd, u32 ra, u32 rb) { auto mem_i16 = ReadMemory(addr_i64, 16); auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LHAX", &PPUInterpreter::LHAX, rd, ra, rb); } void Compiler::LVXL(u32 vd, u32 ra, u32 rb) { LVX(vd, ra, rb); - //InterpreterCall("LVXL", &PPUInterpreter::LVXL, vd, ra, rb); } void Compiler::MFTB(u32 rd, u32 spr) { @@ -3168,7 +3065,6 @@ void Compiler::LWAUX(u32 rd, u32 ra, u32 rb) { auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LWAUX", &PPUInterpreter::LWAUX, rd, ra, rb); } void Compiler::DSTST(u32 ra, u32 rb, u32 strm, u32 t) { @@ -3185,7 +3081,6 @@ void Compiler::LHAUX(u32 rd, u32 ra, u32 rb) { auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LHAUX", &PPUInterpreter::LHAUX, rd, ra, rb); } void Compiler::STHX(u32 rs, u32 ra, u32 rb) { @@ -3196,7 +3091,6 @@ void Compiler::STHX(u32 rs, u32 ra, u32 rb) { } WriteMemory(addr_i64, GetGpr(rs, 16)); - //InterpreterCall("STHX", &PPUInterpreter::STHX, rs, ra, rb); } void Compiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3209,7 +3103,6 @@ void Compiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("ORC", &PPUInterpreter::ORC, ra, rs, rb, rc); } void Compiler::ECOWX(u32 rs, u32 ra, u32 rb) { @@ -3223,7 +3116,6 @@ void Compiler::STHUX(u32 rs, u32 ra, u32 rb) { WriteMemory(addr_i64, GetGpr(rs, 16)); SetGpr(ra, addr_i64); - //InterpreterCall("STHUX", &PPUInterpreter::STHUX, rs, ra, rb); } void Compiler::OR(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3235,7 +3127,6 @@ void Compiler::OR(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("OR", &PPUInterpreter::OR, ra, rs, rb, rc); } void Compiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -3248,9 +3139,12 @@ void Compiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - // TODO implement oe + if (oe) { + // TODO implement oe + CompilationError("DIVDUO"); + } + // TODO make sure an exception does not occur on divide by 0 and overflow - //InterpreterCall("DIVDU", &PPUInterpreter::DIVDU, rd, ra, rb, oe, rc); } void Compiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -3264,9 +3158,12 @@ void Compiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - // TODO implement oe + if (oe) { + // TODO implement oe + CompilationError("DIVWUO"); + } + // TODO make sure an exception does not occur on divide by 0 and overflow - //InterpreterCall("DIVWU", &PPUInterpreter::DIVWU, rd, ra, rb, oe, rc); } void Compiler::MTSPR(u32 spr, u32 rs) { @@ -3291,7 +3188,6 @@ void Compiler::MTSPR(u32 spr, u32 rs) { break; } - //InterpreterCall("MTSPR", &PPUInterpreter::MTSPR, spr, rs); } void Compiler::NAND(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3304,12 +3200,10 @@ void Compiler::NAND(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("NAND", &PPUInterpreter::NAND, ra, rs, rb, rc); } void Compiler::STVXL(u32 vs, u32 ra, u32 rb) { STVX(vs, ra, rb); - //InterpreterCall("STVXL", &PPUInterpreter::STVXL, vs, ra, rb); } void Compiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -3322,9 +3216,12 @@ void Compiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - // TODO implement oe + if (oe) { + // TODO implement oe + CompilationError("DIVDO"); + } + // TODO make sure an exception does not occur on divide by 0 and overflow - //InterpreterCall("DIVD", &PPUInterpreter::DIVD, rd, ra, rb, oe, rc); } void Compiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -3338,9 +3235,12 @@ void Compiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - // TODO implement oe + if (oe) { + // TODO implement oe + CompilationError("DIVWO"); + } + // TODO make sure an exception does not occur on divide by 0 and overflow - //InterpreterCall("DIVW", &PPUInterpreter::DIVW, rd, ra, rb, oe, rc); } void Compiler::LVLX(u32 vd, u32 ra, u32 rb) { @@ -3357,7 +3257,6 @@ void Compiler::LVLX(u32 vd, u32 ra, u32 rb) { auto mem_i128 = ReadMemory(addr_i64, 128, 16); mem_i128 = m_ir_builder->CreateShl(mem_i128, eb_i128); SetVr(vd, mem_i128); - //InterpreterCall("LVLX", &PPUInterpreter::LVLX, vd, ra, rb); } void Compiler::LDBRX(u32 rd, u32 ra, u32 rb) { @@ -3369,7 +3268,6 @@ void Compiler::LDBRX(u32 rd, u32 ra, u32 rb) { auto mem_i64 = ReadMemory(addr_i64, 64, 0, false); SetGpr(rd, mem_i64); - //InterpreterCall("LDBRX", &PPUInterpreter::LDBRX, rd, ra, rb); } void Compiler::LSWX(u32 rd, u32 ra, u32 rb) { @@ -3386,7 +3284,6 @@ void Compiler::LWBRX(u32 rd, u32 ra, u32 rb) { auto mem_i32 = ReadMemory(addr_i64, 32, 0, false); auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LWBRX", &PPUInterpreter::LWBRX, rd, ra, rb); } void Compiler::LFSX(u32 frd, u32 ra, u32 rb) { @@ -3398,7 +3295,6 @@ void Compiler::LFSX(u32 frd, u32 ra, u32 rb) { auto mem_i32 = ReadMemory(addr_i64, 32); SetFpr(frd, mem_i32); - //InterpreterCall("LFSX", &PPUInterpreter::LFSX, frd, ra, rb); } void Compiler::SRW(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3413,8 +3309,6 @@ void Compiler::SRW(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("SRW", &PPUInterpreter::SRW, ra, rs, rb, rc); } void Compiler::SRD(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3430,8 +3324,6 @@ void Compiler::SRD(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("SRD", &PPUInterpreter::SRD, ra, rs, rb, rc); } void Compiler::LVRX(u32 vd, u32 ra, u32 rb) { @@ -3452,8 +3344,6 @@ void Compiler::LVRX(u32 vd, u32 ra, u32 rb) { auto cmp_i128 = m_ir_builder->CreateSExt(cmp_i1, m_ir_builder->getIntNTy(128)); mem_i128 = m_ir_builder->CreateAnd(mem_i128, cmp_i128); SetVr(vd, mem_i128); - - //InterpreterCall("LVRX", &PPUInterpreter::LVRX, vd, ra, rb); } void Compiler::LSWI(u32 rd, u32 ra, u32 nb) { @@ -3474,8 +3364,6 @@ void Compiler::LSWI(u32 rd, u32 ra, u32 nb) { SetGpr(rd, val_i64); rd = (rd + 1) % 32; } - - //InterpreterCall("LSWI", &PPUInterpreter::LSWI, rd, ra, nb); } void Compiler::LFSUX(u32 frd, u32 ra, u32 rb) { @@ -3485,12 +3373,10 @@ void Compiler::LFSUX(u32 frd, u32 ra, u32 rb) { auto mem_i32 = ReadMemory(addr_i64, 32); SetFpr(frd, mem_i32); SetGpr(ra, addr_i64); - //InterpreterCall("LFSUX", &PPUInterpreter::LFSUX, frd, ra, rb); } void Compiler::SYNC(u32 l) { m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); - //InterpreterCall("SYNC", &PPUInterpreter::SYNC, l); } void Compiler::LFDX(u32 frd, u32 ra, u32 rb) { @@ -3502,7 +3388,6 @@ void Compiler::LFDX(u32 frd, u32 ra, u32 rb) { auto mem_i64 = ReadMemory(addr_i64, 64); SetFpr(frd, mem_i64); - //InterpreterCall("LFDX", &PPUInterpreter::LFDX, frd, ra, rb); } void Compiler::LFDUX(u32 frd, u32 ra, u32 rb) { @@ -3512,7 +3397,6 @@ void Compiler::LFDUX(u32 frd, u32 ra, u32 rb) { auto mem_i64 = ReadMemory(addr_i64, 64); SetFpr(frd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LFDUX", &PPUInterpreter::LFDUX, frd, ra, rb); } void Compiler::STVLX(u32 vs, u32 ra, u32 rb) { @@ -3531,7 +3415,6 @@ void Compiler::STWBRX(u32 rs, u32 ra, u32 rb) { } WriteMemory(addr_i64, GetGpr(rs, 32), 0, false); - //InterpreterCall("STWBRX", &PPUInterpreter::STWBRX, rs, ra, rb); } void Compiler::STFSX(u32 frs, u32 ra, u32 rb) { @@ -3543,7 +3426,6 @@ void Compiler::STFSX(u32 frs, u32 ra, u32 rb) { auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); WriteMemory(addr_i64, frs_i32); - //InterpreterCall("STFSX", &PPUInterpreter::STFSX, frs, ra, rb); } void Compiler::STVRX(u32 vs, u32 ra, u32 rb) { @@ -3558,7 +3440,6 @@ void Compiler::STFSUX(u32 frs, u32 ra, u32 rb) { auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); WriteMemory(addr_i64, frs_i32); SetGpr(ra, addr_i64); - //InterpreterCall("STFSUX", &PPUInterpreter::STFSUX, frs, ra, rb); } void Compiler::STSWI(u32 rd, u32 ra, u32 nb) { @@ -3592,8 +3473,6 @@ void Compiler::STSWI(u32 rd, u32 ra, u32 nb) { } } } - - //InterpreterCall("STSWI", &PPUInterpreter::STSWI, rd, ra, nb); } void Compiler::STFDX(u32 frs, u32 ra, u32 rb) { @@ -3605,7 +3484,6 @@ void Compiler::STFDX(u32 frs, u32 ra, u32 rb) { auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); WriteMemory(addr_i64, frs_i64); - //InterpreterCall("STFDX", &PPUInterpreter::STFDX, frs, ra, rb); } void Compiler::STFDUX(u32 frs, u32 ra, u32 rb) { @@ -3616,12 +3494,10 @@ void Compiler::STFDUX(u32 frs, u32 ra, u32 rb) { auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); WriteMemory(addr_i64, frs_i64); SetGpr(ra, addr_i64); - //InterpreterCall("STFDUX", &PPUInterpreter::STFDUX, frs, ra, rb); } void Compiler::LVLXL(u32 vd, u32 ra, u32 rb) { LVLX(vd, ra, rb); - //InterpreterCall("LVLXL", &PPUInterpreter::LVLXL, vd, ra, rb); } void Compiler::LHBRX(u32 rd, u32 ra, u32 rb) { @@ -3634,7 +3510,6 @@ void Compiler::LHBRX(u32 rd, u32 ra, u32 rb) { auto mem_i16 = ReadMemory(addr_i64, 16, 0, false); auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LHBRX", &PPUInterpreter::LHBRX, rd, ra, rb); } void Compiler::SRAW(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3657,8 +3532,6 @@ void Compiler::SRAW(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("SRAW", &PPUInterpreter::SRAW, ra, rs, rb, rc); } void Compiler::SRAD(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3682,13 +3555,10 @@ void Compiler::SRAD(u32 ra, u32 rs, u32 rb, bool rc) { if (rc) { SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("SRAD", &PPUInterpreter::SRAD, ra, rs, rb, rc); } void Compiler::LVRXL(u32 vd, u32 ra, u32 rb) { LVRX(vd, ra, rb); - //InterpreterCall("LVRXL", &PPUInterpreter::LVRXL, vd, ra, rb); } void Compiler::DSS(u32 strm, u32 a) { @@ -3713,8 +3583,6 @@ void Compiler::SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { if (rc) { SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("SRAWI", &PPUInterpreter::SRAWI, ra, rs, sh, rc); } void Compiler::SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { @@ -3735,23 +3603,18 @@ void Compiler::SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { if (rc) { SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); } - - //InterpreterCall("SRADI1", &PPUInterpreter::SRADI1, ra, rs, sh, rc); } void Compiler::SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { SRADI1(ra, rs, sh, rc); - //InterpreterCall("SRADI2", &PPUInterpreter::SRADI2, ra, rs, sh, rc); } void Compiler::EIEIO() { m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); - //InterpreterCall("EIEIO", &PPUInterpreter::EIEIO); } void Compiler::STVLXL(u32 vs, u32 ra, u32 rb) { STVLX(vs, ra, rb); - //InterpreterCall("STVLXL", &PPUInterpreter::STVLXL, vs, ra, rb); } void Compiler::STHBRX(u32 rs, u32 ra, u32 rb) { @@ -3762,7 +3625,6 @@ void Compiler::STHBRX(u32 rs, u32 ra, u32 rb) { } WriteMemory(addr_i64, GetGpr(rs, 16), 0, false); - //InterpreterCall("STHBRX", &PPUInterpreter::STHBRX, rs, ra, rb); } void Compiler::EXTSH(u32 ra, u32 rs, bool rc) { @@ -3773,12 +3635,10 @@ void Compiler::EXTSH(u32 ra, u32 rs, bool rc) { if (rc) { SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("EXTSH", &PPUInterpreter::EXTSH, ra, rs, rc); } void Compiler::STVRXL(u32 vs, u32 ra, u32 rb) { STVRX(vs, ra, rb); - //InterpreterCall("STVRXL", &PPUInterpreter::STVRXL, vs, ra, rb); } void Compiler::EXTSB(u32 ra, u32 rs, bool rc) { @@ -3789,7 +3649,6 @@ void Compiler::EXTSB(u32 ra, u32 rs, bool rc) { if (rc) { SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("EXTSB", &PPUInterpreter::EXTSB, ra, rs, rc); } void Compiler::STFIWX(u32 frs, u32 ra, u32 rb) { @@ -3802,7 +3661,6 @@ void Compiler::STFIWX(u32 frs, u32 ra, u32 rb) { auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); auto frs_i32 = m_ir_builder->CreateTrunc(frs_i64, m_ir_builder->getInt32Ty()); WriteMemory(addr_i64, frs_i32); - //InterpreterCall("STFIWX", &PPUInterpreter::STFIWX, frs, ra, rb); } void Compiler::EXTSW(u32 ra, u32 rs, bool rc) { @@ -3813,7 +3671,6 @@ void Compiler::EXTSW(u32 ra, u32 rs, bool rc) { if (rc) { SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); } - //InterpreterCall("EXTSW", &PPUInterpreter::EXTSW, ra, rs, rc); } void Compiler::ICBI(u32 ra, u32 rs) { @@ -3835,7 +3692,6 @@ void Compiler::DCBZ(u32 ra, u32 rb) { std::vector types = {(Type *)m_ir_builder->getInt8PtrTy(), (Type *)m_ir_builder->getInt32Ty()}; m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memset, types), addr_i8_ptr, m_ir_builder->getInt8(0), m_ir_builder->getInt32(128), m_ir_builder->getInt32(128), m_ir_builder->getInt1(true)); - //InterpreterCall("DCBZ", &PPUInterpreter::DCBZ, ra, rb);L } void Compiler::LWZ(u32 rd, u32 ra, s32 d) { @@ -3848,7 +3704,6 @@ void Compiler::LWZ(u32 rd, u32 ra, s32 d) { auto mem_i32 = ReadMemory(addr_i64, 32); auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LWZ", &PPUInterpreter::LWZ, rd, ra, d); } void Compiler::LWZU(u32 rd, u32 ra, s32 d) { @@ -3860,7 +3715,6 @@ void Compiler::LWZU(u32 rd, u32 ra, s32 d) { auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LWZU", &PPUInterpreter::LWZU, rd, ra, d); } void Compiler::LBZ(u32 rd, u32 ra, s32 d) { @@ -3873,7 +3727,6 @@ void Compiler::LBZ(u32 rd, u32 ra, s32 d) { auto mem_i8 = ReadMemory(addr_i64, 8); auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LBZ", &PPUInterpreter::LBZ, rd, ra, d); } void Compiler::LBZU(u32 rd, u32 ra, s32 d) { @@ -3885,7 +3738,6 @@ void Compiler::LBZU(u32 rd, u32 ra, s32 d) { auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LBZU", &PPUInterpreter::LBZU, rd, ra, d); } void Compiler::STW(u32 rs, u32 ra, s32 d) { @@ -3896,7 +3748,6 @@ void Compiler::STW(u32 rs, u32 ra, s32 d) { } WriteMemory(addr_i64, GetGpr(rs, 32)); - //InterpreterCall("STW", &PPUInterpreter::STW, rs, ra, d); } void Compiler::STWU(u32 rs, u32 ra, s32 d) { @@ -3906,7 +3757,6 @@ void Compiler::STWU(u32 rs, u32 ra, s32 d) { WriteMemory(addr_i64, GetGpr(rs, 32)); SetGpr(ra, addr_i64); - //InterpreterCall("STWU", &PPUInterpreter::STWU, rs, ra, d); } void Compiler::STB(u32 rs, u32 ra, s32 d) { @@ -3917,7 +3767,6 @@ void Compiler::STB(u32 rs, u32 ra, s32 d) { } WriteMemory(addr_i64, GetGpr(rs, 8)); - //InterpreterCall("STB", &PPUInterpreter::STB, rs, ra, d); } void Compiler::STBU(u32 rs, u32 ra, s32 d) { @@ -3927,7 +3776,6 @@ void Compiler::STBU(u32 rs, u32 ra, s32 d) { WriteMemory(addr_i64, GetGpr(rs, 8)); SetGpr(ra, addr_i64); - //InterpreterCall("STBU", &PPUInterpreter::STBU, rs, ra, d); } void Compiler::LHZ(u32 rd, u32 ra, s32 d) { @@ -3940,7 +3788,6 @@ void Compiler::LHZ(u32 rd, u32 ra, s32 d) { auto mem_i16 = ReadMemory(addr_i64, 16); auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LHZ", &PPUInterpreter::LHZ, rd, ra, d); } void Compiler::LHZU(u32 rd, u32 ra, s32 d) { @@ -3952,7 +3799,6 @@ void Compiler::LHZU(u32 rd, u32 ra, s32 d) { auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LHZU", &PPUInterpreter::LHZU, rd, ra, d); } void Compiler::LHA(u32 rd, u32 ra, s32 d) { @@ -3965,7 +3811,6 @@ void Compiler::LHA(u32 rd, u32 ra, s32 d) { auto mem_i16 = ReadMemory(addr_i64, 16); auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LHA", &PPUInterpreter::LHA, rd, ra, d); } void Compiler::LHAU(u32 rd, u32 ra, s32 d) { @@ -3977,7 +3822,6 @@ void Compiler::LHAU(u32 rd, u32 ra, s32 d) { auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LHAU", &PPUInterpreter::LHAU, rd, ra, d); } void Compiler::STH(u32 rs, u32 ra, s32 d) { @@ -3988,7 +3832,6 @@ void Compiler::STH(u32 rs, u32 ra, s32 d) { } WriteMemory(addr_i64, GetGpr(rs, 16)); - //InterpreterCall("STH", &PPUInterpreter::STH, rs, ra, d); } void Compiler::STHU(u32 rs, u32 ra, s32 d) { @@ -3998,7 +3841,6 @@ void Compiler::STHU(u32 rs, u32 ra, s32 d) { WriteMemory(addr_i64, GetGpr(rs, 16)); SetGpr(ra, addr_i64); - //InterpreterCall("STHU", &PPUInterpreter::STHU, rs, ra, d); } void Compiler::LMW(u32 rd, u32 ra, s32 d) { @@ -4013,8 +3855,6 @@ void Compiler::LMW(u32 rd, u32 ra, s32 d) { SetGpr(i, val_i64); addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); } - - //InterpreterCall("LMW", &PPUInterpreter::LMW, rd, ra, d); } void Compiler::STMW(u32 rs, u32 ra, s32 d) { @@ -4028,8 +3868,6 @@ void Compiler::STMW(u32 rs, u32 ra, s32 d) { WriteMemory(addr_i64, val_i32); addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); } - - //InterpreterCall("STMW", &PPUInterpreter::STMW, rs, ra, d); } void Compiler::LFS(u32 frd, u32 ra, s32 d) { @@ -4041,7 +3879,6 @@ void Compiler::LFS(u32 frd, u32 ra, s32 d) { auto mem_i32 = ReadMemory(addr_i64, 32); SetFpr(frd, mem_i32); - //InterpreterCall("LFS", &PPUInterpreter::LFS, frd, ra, d); } void Compiler::LFSU(u32 frd, u32 ra, s32 ds) { @@ -4051,7 +3888,6 @@ void Compiler::LFSU(u32 frd, u32 ra, s32 ds) { auto mem_i32 = ReadMemory(addr_i64, 32); SetFpr(frd, mem_i32); SetGpr(ra, addr_i64); - //InterpreterCall("LFSU", &PPUInterpreter::LFSU, frd, ra, ds); } void Compiler::LFD(u32 frd, u32 ra, s32 d) { @@ -4063,7 +3899,6 @@ void Compiler::LFD(u32 frd, u32 ra, s32 d) { auto mem_i64 = ReadMemory(addr_i64, 64); SetFpr(frd, mem_i64); - //InterpreterCall("LFD", &PPUInterpreter::LFD, frd, ra, d); } void Compiler::LFDU(u32 frd, u32 ra, s32 ds) { @@ -4074,7 +3909,6 @@ void Compiler::LFDU(u32 frd, u32 ra, s32 ds) { auto mem_i64 = ReadMemory(addr_i64, 64); SetFpr(frd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LFDU", &PPUInterpreter::LFDU, frd, ra, ds); } void Compiler::STFS(u32 frs, u32 ra, s32 d) { @@ -4086,7 +3920,6 @@ void Compiler::STFS(u32 frs, u32 ra, s32 d) { auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); WriteMemory(addr_i64, frs_i32); - //InterpreterCall("STFS", &PPUInterpreter::STFS, frs, ra, d); } void Compiler::STFSU(u32 frs, u32 ra, s32 d) { @@ -4097,7 +3930,6 @@ void Compiler::STFSU(u32 frs, u32 ra, s32 d) { auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); WriteMemory(addr_i64, frs_i32); SetGpr(ra, addr_i64); - //InterpreterCall("STFSU", &PPUInterpreter::STFSU, frs, ra, d); } void Compiler::STFD(u32 frs, u32 ra, s32 d) { @@ -4109,7 +3941,6 @@ void Compiler::STFD(u32 frs, u32 ra, s32 d) { auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); WriteMemory(addr_i64, frs_i64); - //InterpreterCall("STFD", &PPUInterpreter::STFD, frs, ra, d); } void Compiler::STFDU(u32 frs, u32 ra, s32 d) { @@ -4120,7 +3951,6 @@ void Compiler::STFDU(u32 frs, u32 ra, s32 d) { auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); WriteMemory(addr_i64, frs_i64); SetGpr(ra, addr_i64); - //InterpreterCall("STFDU", &PPUInterpreter::STFDU, frs, ra, d); } void Compiler::LD(u32 rd, u32 ra, s32 ds) { @@ -4132,7 +3962,6 @@ void Compiler::LD(u32 rd, u32 ra, s32 ds) { auto mem_i64 = ReadMemory(addr_i64, 64); SetGpr(rd, mem_i64); - //InterpreterCall("LD", &PPUInterpreter::LD, rd, ra, ds); } void Compiler::LDU(u32 rd, u32 ra, s32 ds) { @@ -4143,7 +3972,6 @@ void Compiler::LDU(u32 rd, u32 ra, s32 ds) { auto mem_i64 = ReadMemory(addr_i64, 64); SetGpr(rd, mem_i64); SetGpr(ra, addr_i64); - //InterpreterCall("LDU", &PPUInterpreter::LDU, rd, ra, ds); } void Compiler::LWA(u32 rd, u32 ra, s32 ds) { @@ -4156,7 +3984,6 @@ void Compiler::LWA(u32 rd, u32 ra, s32 ds) { auto mem_i32 = ReadMemory(addr_i64, 32); auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, mem_i64); - //InterpreterCall("LWA", &PPUInterpreter::LWA, rd, ra, ds); } void Compiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { @@ -4167,8 +3994,12 @@ void Compiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FDIVS."); + } + // TODO: Set flags - //InterpreterCall("FDIVS", &PPUInterpreter::FDIVS, frd, fra, frb, rc); } void Compiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { @@ -4179,8 +4010,12 @@ void Compiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FSUBS."); + } + // TODO: Set flags - //InterpreterCall("FSUBS", &PPUInterpreter::FSUBS, frd, fra, frb, rc); } void Compiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) { @@ -4191,8 +4026,12 @@ void Compiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) { res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FADDS."); + } + // TODO: Set flags - //InterpreterCall("FADDS", &PPUInterpreter::FADDS, frd, fra, frb, rc); } void Compiler::FSQRTS(u32 frd, u32 frb, bool rc) { @@ -4201,9 +4040,13 @@ void Compiler::FSQRTS(u32 frd, u32 frb, bool rc) { auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); - + + if (rc) { + // TODO: Implement this + CompilationError("FSQRTS."); + } + // TODO: Set flags - //InterpreterCall("FSQRTS", &PPUInterpreter::FSQRTS, frd, frb, rc); } void Compiler::FRES(u32 frd, u32 frb, bool rc) { @@ -4211,8 +4054,12 @@ void Compiler::FRES(u32 frd, u32 frb, bool rc) { auto res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FRES."); + } + // TODO: Set flags - //InterpreterCall("FRES", &PPUInterpreter::FRES, frd, frb, rc); } void Compiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { @@ -4223,8 +4070,12 @@ void Compiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FMULS."); + } + // TODO: Set flags - //InterpreterCall("FMULS", &PPUInterpreter::FMULS, frd, fra, frc, rc); } void Compiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4236,8 +4087,12 @@ void Compiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FMADDS."); + } + // TODO: Set flags - //InterpreterCall("FMADDS", &PPUInterpreter::FMADDS, frd, fra, frc, frb, rc); } void Compiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4250,8 +4105,12 @@ void Compiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FMSUBS."); + } + // TODO: Set flags - //InterpreterCall("FMSUBS", &PPUInterpreter::FMSUBS, frd, fra, frc, frb, rc); } void Compiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4265,8 +4124,12 @@ void Compiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FNMSUBS."); + } + // TODO: Set flags - //InterpreterCall("FNMSUBS", &PPUInterpreter::FNMSUBS, frd, fra, frc, frb, rc); } void Compiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4279,8 +4142,12 @@ void Compiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FNMADDS."); + } + // TODO: Set flags - //InterpreterCall("FNMADDS", &PPUInterpreter::FNMADDS, frd, fra, frc, frb, rc); } void Compiler::STD(u32 rs, u32 ra, s32 d) { @@ -4291,7 +4158,6 @@ void Compiler::STD(u32 rs, u32 ra, s32 d) { } WriteMemory(addr_i64, GetGpr(rs, 64)); - //InterpreterCall("STD", &PPUInterpreter::STD, rs, ra, d); } void Compiler::STDU(u32 rs, u32 ra, s32 ds) { @@ -4301,7 +4167,6 @@ void Compiler::STDU(u32 rs, u32 ra, s32 ds) { WriteMemory(addr_i64, GetGpr(rs, 64)); SetGpr(ra, addr_i64); - //InterpreterCall("STDU", &PPUInterpreter::STDU, rs, ra, ds); } void Compiler::MTFSB1(u32 crbd, bool rc) { @@ -4341,8 +4206,12 @@ void Compiler::FCTIW(u32 frd, u32 frb, bool rc) { auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); SetFpr(frd, res_i32); + if (rc) { + // TODO: Implement this + CompilationError("FCTIW."); + } + // TODO: Set flags / Handle NaN / Implement Saturation - //InterpreterCall("FCTIW", &PPUInterpreter::FCTIW, frd, frb, rc); } void Compiler::FCTIWZ(u32 frd, u32 frb, bool rc) { @@ -4355,8 +4224,12 @@ void Compiler::FDIV(u32 frd, u32 fra, u32 frb, bool rc) { auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FDIV."); + } + // TODO: Set flags - //InterpreterCall("FDIV", &PPUInterpreter::FDIV, frd, fra, frb, rc); } void Compiler::FSUB(u32 frd, u32 fra, u32 frb, bool rc) { @@ -4365,8 +4238,12 @@ void Compiler::FSUB(u32 frd, u32 fra, u32 frb, bool rc) { auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FSUB."); + } + // TODO: Set flags - //InterpreterCall("FSUB", &PPUInterpreter::FSUB, frd, fra, frb, rc); } void Compiler::FADD(u32 frd, u32 fra, u32 frb, bool rc) { @@ -4375,8 +4252,12 @@ void Compiler::FADD(u32 frd, u32 fra, u32 frb, bool rc) { auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FADD."); + } + // TODO: Set flags - //InterpreterCall("FADD", &PPUInterpreter::FADD, frd, fra, frb, rc); } void Compiler::FSQRT(u32 frd, u32 frb, bool rc) { @@ -4384,8 +4265,12 @@ void Compiler::FSQRT(u32 frd, u32 frb, bool rc) { auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FSQRT."); + } + // TODO: Set flags - //InterpreterCall("FSQRT", &PPUInterpreter::FSQRT, frd, frb, rc); } void Compiler::FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4398,8 +4283,12 @@ void Compiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) { auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FMUL."); + } + // TODO: Set flags - //InterpreterCall("FMUL", &PPUInterpreter::FMUL, frd, fra, frc, rc); } void Compiler::FRSQRTE(u32 frd, u32 frb, bool rc) { @@ -4407,6 +4296,11 @@ void Compiler::FRSQRTE(u32 frd, u32 frb, bool rc) { auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), res_f64); SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FRSQRTE."); + } } void Compiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4417,8 +4311,12 @@ void Compiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FMSUB."); + } + // TODO: Set flags - //InterpreterCall("FMSUB", &PPUInterpreter::FMSUB, frd, fra, frc, frb, rc); } void Compiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4428,8 +4326,12 @@ void Compiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FMADD."); + } + // TODO: Set flags - //InterpreterCall("FMADD", &PPUInterpreter::FMADD, frd, fra, frc, frb, rc); } void Compiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4440,8 +4342,12 @@ void Compiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FNMSUB."); + } + // TODO: Set flags - //InterpreterCall("FNMSUB", &PPUInterpreter::FNMSUB, frd, fra, frc, frb, rc); } void Compiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { @@ -4453,8 +4359,12 @@ void Compiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FNMADD."); + } + // TODO: Set flags - //InterpreterCall("FNMADD", &PPUInterpreter::FNMADD, frd, fra, frc, frb, rc); } void Compiler::FCMPO(u32 crfd, u32 fra, u32 frb) { @@ -4466,14 +4376,23 @@ void Compiler::FNEG(u32 frd, u32 frb, bool rc) { rb_f64 = m_ir_builder->CreateFNeg(rb_f64); SetFpr(frd, rb_f64); + if (rc) { + // TODO: Implement this + CompilationError("FNEG."); + } + // TODO: Set flags - //InterpreterCall("FNEG", &PPUInterpreter::FNEG, frd, frb, rc); } void Compiler::FMR(u32 frd, u32 frb, bool rc) { SetFpr(frd, GetFpr(frb)); + + if (rc) { + // TODO: Implement this + CompilationError("FMR."); + } + // TODO: Set flags - //InterpreterCall("FMR", &PPUInterpreter::FMR, frd, frb, rc); } void Compiler::FNABS(u32 frd, u32 frb, bool rc) { @@ -4482,8 +4401,12 @@ void Compiler::FNABS(u32 frd, u32 frb, bool rc) { res_f64 = m_ir_builder->CreateFNeg(res_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FNABS."); + } + // TODO: Set flags - //InterpreterCall("FNABS", &PPUInterpreter::FNABS, frd, frb, rc); } void Compiler::FABS(u32 frd, u32 frb, bool rc) { @@ -4491,8 +4414,12 @@ void Compiler::FABS(u32 frd, u32 frb, bool rc) { auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FABS."); + } + // TODO: Set flags - //InterpreterCall("FABS", &PPUInterpreter::FABS, frd, frb, rc); } void Compiler::FCTID(u32 frd, u32 frb, bool rc) { @@ -4500,8 +4427,12 @@ void Compiler::FCTID(u32 frd, u32 frb, bool rc) { auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); SetFpr(frd, res_i64); + if (rc) { + // TODO: Implement this + CompilationError("FCTID."); + } + // TODO: Set flags / Handle NaN / Implement Saturation - //InterpreterCall("FCTID", &PPUInterpreter::FCTID, frd, frb, rc); } void Compiler::FCTIDZ(u32 frd, u32 frb, bool rc) { @@ -4513,8 +4444,12 @@ void Compiler::FCFID(u32 frd, u32 frb, bool rc) { auto res_f64 = m_ir_builder->CreateSIToFP(rb_i64, m_ir_builder->getDoubleTy()); SetFpr(frd, res_f64); + if (rc) { + // TODO: Implement this + CompilationError("FCFID."); + } + // TODO: Set flags - //InterpreterCall("FCFID", &PPUInterpreter::FCFID, frd, frb, rc); } void Compiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { From a9645eda73006326fd271f04dba82becf7765191 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Tue, 25 Nov 2014 02:14:12 +0530 Subject: [PATCH 09/13] Implement some more FP instructions in the PPU LLVM recompiler --- rpcs3/Emu/Cell/PPUInterpreter.h | 19 +--- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 127 +++++++++++++++++----- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 7 +- 3 files changed, 111 insertions(+), 42 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 04fad178c6..01879d4d3a 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -4192,9 +4192,7 @@ private: } } - (u64&)CPU.FPR[frd] = 0xfff8000000000000ull | r; - if(r == 0 && ( (u64&)b & DOUBLE_SIGN )) (u64&)CPU.FPR[frd] |= 0x100000000ull; - + (u64&)CPU.FPR[frd] = r; if(rc) UNK("fctiw."); } void FCTIWZ(u32 frd, u32 frb, bool rc) @@ -4232,10 +4230,7 @@ private: value = (u32)i; } - (u64&)CPU.FPR[frd] = 0xfff8000000000000ull | value; - if (value == 0 && ( (u64&)b & DOUBLE_SIGN )) - (u64&)CPU.FPR[frd] |= 0x100000000ull; - + (u64&)CPU.FPR[frd] = (u64)value; if(rc) UNK("fctiwz."); } void FDIV(u32 frd, u32 fra, u32 frb, bool rc) @@ -4333,7 +4328,7 @@ private: { CPU.SetFPSCRException(FPSCR_ZX); } - CPU.FPR[frd] = static_cast(1.0 / sqrt(CPU.FPR[frb])); + CPU.FPR[frd] = 1.0 / sqrt(CPU.FPR[frb]); if(rc) UNK("frsqrte.");//CPU.UpdateCR1(CPU.FPR[frd]); } void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) @@ -4458,9 +4453,7 @@ private: } } - (u64&)CPU.FPR[frd] = 0xfff8000000000000ull | r; - if(r == 0 && ( (u64&)b & DOUBLE_SIGN )) (u64&)CPU.FPR[frd] |= 0x100000000ull; - + (u64&)CPU.FPR[frd] = r; if(rc) UNK("fctid."); } void FCTIDZ(u32 frd, u32 frb, bool rc) @@ -4498,9 +4491,7 @@ private: r = (u64)i; } - (u64&)CPU.FPR[frd] = 0xfff8000000000000ull | r; - if(r == 0 && ( (u64&)b & DOUBLE_SIGN )) (u64&)CPU.FPR[frd] |= 0x100000000ull; - + (u64&)CPU.FPR[frd] = r; if(rc) UNK("fctidz."); } void FCFID(u32 frd, u32 frb, bool rc) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 79f189fe03..91d97036f1 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -3991,8 +3991,7 @@ void Compiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4007,8 +4006,7 @@ void Compiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4023,8 +4021,7 @@ void Compiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4038,8 +4035,7 @@ void Compiler::FSQRTS(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4052,7 +4048,8 @@ void Compiler::FSQRTS(u32 frd, u32 frb, bool rc) { void Compiler::FRES(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); auto res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), rb_f64); - SetFpr(frd, res_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4067,8 +4064,7 @@ void Compiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { auto rc_f64 = GetFpr(frc); auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4084,8 +4080,7 @@ void Compiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto rc_f64 = GetFpr(frc); auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4102,8 +4097,7 @@ void Compiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { rb_f64 = m_ir_builder->CreateFNeg(rb_f64); auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4121,8 +4115,7 @@ void Compiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); res_f64 = m_ir_builder->CreateFNeg(res_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4139,8 +4132,7 @@ void Compiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); res_f64 = m_ir_builder->CreateFNeg(res_f64); auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + SetFpr(frd, res_f32); if (rc) { // TODO: Implement this @@ -4194,27 +4186,68 @@ void Compiler::MTFSF(u32 flm, u32 frb, bool rc) { } void Compiler::FCMPU(u32 crfd, u32 fra, u32 frb) { - InterpreterCall("FCMPU", &PPUInterpreter::FCMPU, crfd, fra, frb); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); + auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); + auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); + SetCr(cr_i32); + + // TODO: Set flags / Handle NaN } void Compiler::FRSP(u32 frd, u32 frb, bool rc) { - InterpreterCall("FRSP", &PPUInterpreter::FRSP, frd, frb, rc); + auto rb_f64 = GetFpr(frb); + auto res_f32 = m_ir_builder->CreateFPTrunc(rb_f64, m_ir_builder->getFloatTy()); + auto res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FRSP."); + } + + // TODO: Revisit this + // TODO: Set flags } void Compiler::FCTIW(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0x7FFFFFFF)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648)); auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); - SetFpr(frd, res_i32); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); + SetFpr(frd, res_i64); if (rc) { // TODO: Implement this CompilationError("FCTIW."); } - // TODO: Set flags / Handle NaN / Implement Saturation + // TODO: Set flags / Implement rounding modes + //InterpreterCall("FCTIW", &PPUInterpreter::FCTIWZ, frd, frb, rc); } void Compiler::FCTIWZ(u32 frd, u32 frb, bool rc) { + //auto rb_f64 = GetFpr(frb); + //auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0x7FFFFFFF)); + //auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648)); + //auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); + //auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + //res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); + //res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); + //SetFpr(frd, res_i64); + + //if (rc) { + // // TODO: Implement this + // CompilationError("FCTIWZ."); + //} + + // TODO: Set flags InterpreterCall("FCTIWZ", &PPUInterpreter::FCTIWZ, frd, frb, rc); } @@ -4274,7 +4307,19 @@ void Compiler::FSQRT(u32 frd, u32 frb, bool rc) { } void Compiler::FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - InterpreterCall("FSEL", &PPUInterpreter::FSEL, frd, fra, frc, frb, rc); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto cmp_i1 = m_ir_builder->CreateFCmpOGE(ra_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0.0)); + auto res_f64 = m_ir_builder->CreateSelect(cmp_i1, rc_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FSEL."); + } + + // TODO: Set flags } void Compiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) { @@ -4368,7 +4413,16 @@ void Compiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { } void Compiler::FCMPO(u32 crfd, u32 fra, u32 frb) { - InterpreterCall("FCMPO", &PPUInterpreter::FCMPO, crfd, fra, frb); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); + auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); + auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); + SetCr(cr_i32); + + // TODO: Set flags / Handle NaN } void Compiler::FNEG(u32 frd, u32 frb, bool rc) { @@ -4424,7 +4478,11 @@ void Compiler::FABS(u32 frd, u32 frb, bool rc) { void Compiler::FCTID(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0x7FFFFFFFFFFFFFFFll)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808ll)); auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); SetFpr(frd, res_i64); if (rc) { @@ -4432,10 +4490,25 @@ void Compiler::FCTID(u32 frd, u32 frb, bool rc) { CompilationError("FCTID."); } - // TODO: Set flags / Handle NaN / Implement Saturation + // TODO: Set flags / Implement rounding modes + //InterpreterCall("FCTIDZ", &PPUInterpreter::FCTID, frd, frb, rc); } void Compiler::FCTIDZ(u32 frd, u32 frb, bool rc) { + //auto rb_f64 = GetFpr(frb); + //auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0x7FFFFFFFFFFFFFFFll)); + //auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808ll)); + //auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); + //res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); + //res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); + //SetFpr(frd, res_i64); + + //if (rc) { + // // TODO: Implement this + // CompilationError("FCTIDZ."); + //} + + // TODO: Set flags InterpreterCall("FCTIDZ", &PPUInterpreter::FCTIDZ, frd, frb, rc); } diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index 9acb6ddfc4..855c18e287 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -164,7 +164,7 @@ VerifyInstructionAgainstInterpreter(fmt::Format("%s.%d", #fn, tc).c_str(), &Comp std::string ret; for (int i = 0; i < 32; i++) { - ret += fmt::Format("GPR[%02d] = 0x%016llx FPR[%02d] = %16g VPR[%02d] = 0x%s [%s]\n", i, GPR[i], i, FPR[i]._double, i, VPR[i].to_hex().c_str(), VPR[i].to_xyzw().c_str()); + ret += fmt::Format("GPR[%02d] = 0x%016llx FPR[%02d] = %16g (0x%016llx) VPR[%02d] = 0x%s [%s]\n", i, GPR[i], i, FPR[i]._double, FPR[i]._u64, i, VPR[i].to_hex().c_str(), VPR[i].to_xyzw().c_str()); } for (int i = 0; i < 8; i++) { @@ -663,17 +663,22 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUBS, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUBS, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADDS, 0, 5, 0, 1, 2, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCMPU, 0, 5, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSP, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIW, 0, 5, 0, 1, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIWZ, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIV, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUB, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADD, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRT, 0, 5, 0, 1, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSEL, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMUL, 0, 5, 0, 1, 2, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSQRTE, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUB, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMADD, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUB, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADD, 0, 5, 0, 1, 2, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCMPO, 0, 5, 3, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNEG, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMR, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNABS, 0, 5, 0, 1, false); From 205e1d88b3a8b7ae1b941ef03d164bdbbaf0d0e6 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Wed, 26 Nov 2014 00:53:24 +0530 Subject: [PATCH 10/13] PPU LLVM recompiler: Implement more instructions --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 92 ++++++++++++++--------- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 4 + 2 files changed, 62 insertions(+), 34 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 55c406177d..85e75b9a03 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -1778,7 +1778,21 @@ void Compiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { } void Compiler::SC(u32 sc_code) { - InterpreterCall("SC", &PPUInterpreter::SC, sc_code); + switch (sc_code) { + case 2: + Call("SysCalls.DoSyscall", SysCalls::DoSyscall, m_state.args[CompileTaskState::Args::State], GetGpr(11)); + break; + case 3: + Call("StaticFuncManager.StaticExecute", &StaticFuncManager::StaticExecute, + m_ir_builder->getInt64((u64)&Emu.GetSFuncManager()), m_state.args[CompileTaskState::Args::State], GetGpr(11, 32)); + break; + case 4: + Call("PPUThread.FastStop", &PPUThread::FastStop, m_state.args[CompileTaskState::Args::State]); + break; + default: + CompilationError(fmt::Format("SC %u", sc_code)); + break; + } } void Compiler::B(s32 ll, u32 aa, u32 lk) { @@ -2955,7 +2969,15 @@ void Compiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) { } void Compiler::ECIWX(u32 rd, u32 ra, u32 rb) { - InterpreterCall("ECIWX", &PPUInterpreter::ECIWX, rd, ra, rb); + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::LHZUX(u32 rd, u32 ra, u32 rb) { @@ -3106,7 +3128,13 @@ void Compiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) { } void Compiler::ECOWX(u32 rs, u32 ra, u32 rb) { - InterpreterCall("ECOWX", &PPUInterpreter::ECOWX, rs, ra, rb); + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 32)); } void Compiler::STHUX(u32 rs, u32 ra, u32 rb) { @@ -4215,8 +4243,8 @@ void Compiler::FRSP(u32 frd, u32 frb, bool rc) { void Compiler::FCTIW(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0x7FFFFFFF)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648)); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); @@ -4229,26 +4257,24 @@ void Compiler::FCTIW(u32 frd, u32 frb, bool rc) { } // TODO: Set flags / Implement rounding modes - //InterpreterCall("FCTIW", &PPUInterpreter::FCTIWZ, frd, frb, rc); } void Compiler::FCTIWZ(u32 frd, u32 frb, bool rc) { - //auto rb_f64 = GetFpr(frb); - //auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0x7FFFFFFF)); - //auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648)); - //auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); - //auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - //res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); - //res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); - //SetFpr(frd, res_i64); + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); + auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); + SetFpr(frd, res_i64); - //if (rc) { - // // TODO: Implement this - // CompilationError("FCTIWZ."); - //} + if (rc) { + // TODO: Implement this + CompilationError("FCTIWZ."); + } // TODO: Set flags - InterpreterCall("FCTIWZ", &PPUInterpreter::FCTIWZ, frd, frb, rc); } void Compiler::FDIV(u32 frd, u32 fra, u32 frb, bool rc) { @@ -4478,8 +4504,8 @@ void Compiler::FABS(u32 frd, u32 frb, bool rc) { void Compiler::FCTID(u32 frd, u32 frb, bool rc) { auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0x7FFFFFFFFFFFFFFFll)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808ll)); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); @@ -4491,25 +4517,23 @@ void Compiler::FCTID(u32 frd, u32 frb, bool rc) { } // TODO: Set flags / Implement rounding modes - //InterpreterCall("FCTIDZ", &PPUInterpreter::FCTID, frd, frb, rc); } void Compiler::FCTIDZ(u32 frd, u32 frb, bool rc) { - //auto rb_f64 = GetFpr(frb); - //auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0x7FFFFFFFFFFFFFFFll)); - //auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808ll)); - //auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); - //res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); - //res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); - //SetFpr(frd, res_i64); + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); + auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); + SetFpr(frd, res_i64); - //if (rc) { - // // TODO: Implement this - // CompilationError("FCTIDZ."); - //} + if (rc) { + // TODO: Implement this + CompilationError("FCTIDZ."); + } // TODO: Set flags - InterpreterCall("FCTIDZ", &PPUInterpreter::FCTIDZ, frd, frb, rc); } void Compiler::FCFID(u32 frd, u32 frb, bool rc) { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index 855c18e287..b7638eeeca 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -703,6 +703,8 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZU, 0, input, 5, 14, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZX, 0, input, 5, 0, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZX, 1, input, 5, 14, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECIWX, 0, input, 5, 0, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECIWX, 1, input, 5, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZUX, 0, input, 5, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHA, 0, input, 5, 0, 0x100F0); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHA, 1, input, 5, 14, 0x100F0); @@ -789,6 +791,8 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHU, 0, input, 3, 14, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHX, 0, input, 3, 0, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHX, 1, input, 3, 14, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECOWX, 0, input, 3, 0, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECOWX, 1, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHUX, 0, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHBRX, 0, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STW, 0, input, 3, 0, 0x10000); From 6ea50567b67903a1e097ac2b68c112e1d28cea99 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sat, 29 Nov 2014 01:39:59 +0530 Subject: [PATCH 11/13] PPU: Implemented some instructions in the recompiler. Fixed some bugs in the interpreter. --- rpcs3/Emu/Cell/PPUInterpreter.h | 172 +++++++++++++--------- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 137 +++++++++++++++-- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 6 +- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 11 +- 4 files changed, 241 insertions(+), 85 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index f96d634f23..76a9991a62 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -834,11 +834,11 @@ private: } void VCTSXS(u32 vd, u32 uimm5, u32 vb) { - int nScale = 1 << uimm5; - + u32 nScale = 1 << uimm5; + for (uint w = 0; w < 4; w++) - { - float result = CPU.VPR[vb]._f[w] * nScale; + { + double result = (double)CPU.VPR[vb]._f[w] * nScale; if (result > 0x7fffffff) { @@ -856,12 +856,12 @@ private: } void VCTUXS(u32 vd, u32 uimm5, u32 vb) { - int nScale = 1 << uimm5; + u32 nScale = 1 << uimm5; for (uint w = 0; w < 4; w++) { // C rounding = Round towards 0 - float result = CPU.VPR[vb]._f[w] * nScale; + double result = (double)CPU.VPR[vb]._f[w] * nScale; if (result > 0xffffffffu) { @@ -1078,26 +1078,32 @@ private: } void VMRGLB(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint h = 0; h < 8; h++) { - CPU.VPR[vd]._u8[15 - h*2] = CPU.VPR[va]._u8[7 - h]; - CPU.VPR[vd]._u8[15 - h*2 - 1] = CPU.VPR[vb]._u8[7 - h]; + CPU.VPR[vd]._u8[15 - h*2] = VA._u8[7 - h]; + CPU.VPR[vd]._u8[15 - h*2 - 1] = VB._u8[7 - h]; } } void VMRGLH(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._u16[7 - w*2] = CPU.VPR[va]._u16[3 - w]; - CPU.VPR[vd]._u16[7 - w*2 - 1] = CPU.VPR[vb]._u16[3 - w]; + CPU.VPR[vd]._u16[7 - w*2] = VA._u16[3 - w]; + CPU.VPR[vd]._u16[7 - w*2 - 1] = VB._u16[3 - w]; } } void VMRGLW(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint d = 0; d < 2; d++) { - CPU.VPR[vd]._u32[3 - d*2] = CPU.VPR[va]._u32[1 - d]; - CPU.VPR[vd]._u32[3 - d*2 - 1] = CPU.VPR[vb]._u32[1 - d]; + CPU.VPR[vd]._u32[3 - d*2] = VA._u32[1 - d]; + CPU.VPR[vd]._u32[3 - d*2 - 1] = VB._u32[1 - d]; } } void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) //nf @@ -1168,7 +1174,7 @@ private: for (uint b = 0; b < 4; b++) { - result += CPU.VPR[va]._u8[w*4 + b] * CPU.VPR[vb]._u8[w*4 + b]; + result += (u32)CPU.VPR[va]._u8[w*4 + b] * (u32)CPU.VPR[vb]._u8[w*4 + b]; } result += CPU.VPR[vc]._u32[w]; @@ -1183,7 +1189,7 @@ private: for (uint h = 0; h < 2; h++) { - result += CPU.VPR[va]._u16[w*2 + h] * CPU.VPR[vb]._u16[w*2 + h]; + result += (u32)CPU.VPR[va]._u16[w*2 + h] * (u32)CPU.VPR[vb]._u16[w*2 + h]; } result += CPU.VPR[vc]._u32[w]; @@ -1199,7 +1205,7 @@ private: for (uint h = 0; h < 2; h++) { - result += CPU.VPR[va]._u16[w*2 + h] * CPU.VPR[vb]._u16[w*2 + h]; + result += (u64)CPU.VPR[va]._u16[w*2 + h] * (u64)CPU.VPR[vb]._u16[w*2 + h]; } result += CPU.VPR[vc]._u32[w]; @@ -1307,16 +1313,18 @@ private: } void VPKPX(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { - u16 bb7 = CPU.VPR[vb]._u8[15 - (h*4 + 0)] & 0x1; - u16 bb8 = CPU.VPR[vb]._u8[15 - (h*4 + 1)] >> 3; - u16 bb16 = CPU.VPR[vb]._u8[15 - (h*4 + 2)] >> 3; - u16 bb24 = CPU.VPR[vb]._u8[15 - (h*4 + 3)] >> 3; - u16 ab7 = CPU.VPR[va]._u8[15 - (h*4 + 0)] & 0x1; - u16 ab8 = CPU.VPR[va]._u8[15 - (h*4 + 1)] >> 3; - u16 ab16 = CPU.VPR[va]._u8[15 - (h*4 + 2)] >> 3; - u16 ab24 = CPU.VPR[va]._u8[15 - (h*4 + 3)] >> 3; + u16 bb7 = VB._u8[15 - (h*4 + 0)] & 0x1; + u16 bb8 = VB._u8[15 - (h*4 + 1)] >> 3; + u16 bb16 = VB._u8[15 - (h*4 + 2)] >> 3; + u16 bb24 = VB._u8[15 - (h*4 + 3)] >> 3; + u16 ab7 = VA._u8[15 - (h*4 + 0)] & 0x1; + u16 ab8 = VA._u8[15 - (h*4 + 1)] >> 3; + u16 ab16 = VA._u8[15 - (h*4 + 2)] >> 3; + u16 ab24 = VA._u8[15 - (h*4 + 3)] >> 3; CPU.VPR[vd]._u16[3 - h] = (bb7 << 15) | (bb8 << 10) | (bb16 << 5) | bb24; CPU.VPR[vd]._u16[4 + (3 - h)] = (ab7 << 15) | (ab8 << 10) | (ab16 << 5) | ab24; @@ -1324,9 +1332,11 @@ private: } void VPKSHSS(u32 vd, u32 va, u32 vb) //nf { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint b = 0; b < 8; b++) { - s16 result = CPU.VPR[va]._s16[b]; + s16 result = VA._s16[b]; if (result > INT8_MAX) { @@ -1341,7 +1351,7 @@ private: CPU.VPR[vd]._s8[b+8] = (s8)result; - result = CPU.VPR[vb]._s16[b]; + result = VB._s16[b]; if (result > INT8_MAX) { @@ -1359,9 +1369,11 @@ private: } void VPKSHUS(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint b = 0; b < 8; b++) { - s16 result = CPU.VPR[va]._s16[b]; + s16 result = VA._s16[b]; if (result > UINT8_MAX) { @@ -1376,7 +1388,7 @@ private: CPU.VPR[vd]._u8[b+8] = (u8)result; - result = CPU.VPR[vb]._s16[b]; + result = VB._s16[b]; if (result > UINT8_MAX) { @@ -1394,9 +1406,11 @@ private: } void VPKSWSS(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { - s32 result = CPU.VPR[va]._s32[h]; + s32 result = VA._s32[h]; if (result > INT16_MAX) { @@ -1411,7 +1425,7 @@ private: CPU.VPR[vd]._s16[h+4] = result; - result = CPU.VPR[vb]._s32[h]; + result = VB._s32[h]; if (result > INT16_MAX) { @@ -1429,9 +1443,11 @@ private: } void VPKSWUS(u32 vd, u32 va, u32 vb) //nf { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { - s32 result = CPU.VPR[va]._s32[h]; + s32 result = VA._s32[h]; if (result > UINT16_MAX) { @@ -1446,7 +1462,7 @@ private: CPU.VPR[vd]._u16[h+4] = result; - result = CPU.VPR[vb]._s32[h]; + result = VB._s32[h]; if (result > UINT16_MAX) { @@ -1464,17 +1480,21 @@ private: } void VPKUHUM(u32 vd, u32 va, u32 vb) //nf { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint b = 0; b < 8; b++) { - CPU.VPR[vd]._u8[b+8] = CPU.VPR[va]._u8[b*2]; - CPU.VPR[vd]._u8[b ] = CPU.VPR[vb]._u8[b*2]; + CPU.VPR[vd]._u8[b+8] = VA._u8[b*2]; + CPU.VPR[vd]._u8[b ] = VB._u8[b*2]; } } void VPKUHUS(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint b = 0; b < 8; b++) { - u16 result = CPU.VPR[va]._u16[b]; + u16 result = VA._u16[b]; if (result > UINT8_MAX) { @@ -1484,7 +1504,7 @@ private: CPU.VPR[vd]._u8[b+8] = (u8)result; - result = CPU.VPR[vb]._u16[b]; + result = VB._u16[b]; if (result > UINT8_MAX) { @@ -1497,17 +1517,21 @@ private: } void VPKUWUM(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { - CPU.VPR[vd]._u16[h+4] = CPU.VPR[va]._u16[h*2]; - CPU.VPR[vd]._u16[h ] = CPU.VPR[vb]._u16[h*2]; + CPU.VPR[vd]._u16[h+4] = VA._u16[h*2]; + CPU.VPR[vd]._u16[h ] = VB._u16[h*2]; } } void VPKUWUS(u32 vd, u32 va, u32 vb) //nf { + u128 VA = CPU.VPR[va]; + u128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { - u32 result = CPU.VPR[va]._u32[h]; + u32 result = VA._u32[h]; if (result > UINT16_MAX) { @@ -1517,7 +1541,7 @@ private: CPU.VPR[vd]._u16[h+4] = result; - result = CPU.VPR[vb]._u32[h]; + result = VB._u32[h]; if (result > UINT16_MAX) { @@ -1539,30 +1563,28 @@ private: { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = floor(CPU.VPR[vb]._f[w]); + CPU.VPR[vd]._f[w] = floorf(CPU.VPR[vb]._f[w]); } } void VRFIN(u32 vd, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = floor(CPU.VPR[vb]._f[w] + 0.5f); + CPU.VPR[vd]._f[w] = nearbyintf(CPU.VPR[vb]._f[w]); } } void VRFIP(u32 vd, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = ceil(CPU.VPR[vb]._f[w]); + CPU.VPR[vd]._f[w] = ceilf(CPU.VPR[vb]._f[w]); } } void VRFIZ(u32 vd, u32 vb) { for (uint w = 0; w < 4; w++) { - float f; - modff(CPU.VPR[vb]._f[w], &f); - CPU.VPR[vd]._f[w] = f; + CPU.VPR[vd]._f[w] = truncf(CPU.VPR[vb]._f[w]); } } void VRLB(u32 vd, u32 va, u32 vb) //nf @@ -1605,12 +1627,13 @@ private: } void VSL(u32 vd, u32 va, u32 vb) //nf { + u128 VA = CPU.VPR[va]; u8 sh = CPU.VPR[vb]._u8[0] & 0x7; - CPU.VPR[vd]._u8[0] = CPU.VPR[va]._u8[0] << sh; + CPU.VPR[vd]._u8[0] = VA._u8[0] << sh; for (uint b = 1; b < 16; b++) { - CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] << sh) | (CPU.VPR[va]._u8[b-1] >> (8 - sh)); + CPU.VPR[vd]._u8[b] = (VA._u8[b] << sh) | (VA._u8[b-1] >> (8 - sh)); } } void VSLB(u32 vd, u32 va, u32 vb) @@ -1635,18 +1658,19 @@ private: { for (uint h = 0; h < 8; h++) { - CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] << (CPU.VPR[vb]._u8[h*2] & 0xf); + CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] << (CPU.VPR[vb]._u16[h] & 0xf); } } void VSLO(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf; CPU.VPR[vd].clear(); for (u8 b = 0; b < 16 - nShift; b++) { - CPU.VPR[vd]._u8[15 - b] = CPU.VPR[va]._u8[15 - (b + nShift)]; + CPU.VPR[vd]._u8[15 - b] = VA._u8[15 - (b + nShift)]; } } void VSLW(u32 vd, u32 va, u32 vb) @@ -1710,12 +1734,13 @@ private: } void VSR(u32 vd, u32 va, u32 vb) //nf { + u128 VA = CPU.VPR[va]; u8 sh = CPU.VPR[vb]._u8[0] & 0x7; - CPU.VPR[vd]._u8[15] = CPU.VPR[va]._u8[15] >> sh; + CPU.VPR[vd]._u8[15] = VA._u8[15] >> sh; for (uint b = 14; ~b; b--) { - CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] >> sh) | (CPU.VPR[va]._u8[b+1] << (8 - sh)); + CPU.VPR[vd]._u8[b] = (VA._u8[b] >> sh) | (VA._u8[b+1] << (8 - sh)); } } void VSRAB(u32 vd, u32 va, u32 vb) //nf @@ -1729,14 +1754,14 @@ private: { for (uint h = 0; h < 8; h++) { - CPU.VPR[vd]._s16[h] = CPU.VPR[va]._s16[h] >> (CPU.VPR[vb]._u8[h*2] & 0xf); + CPU.VPR[vd]._s16[h] = CPU.VPR[va]._s16[h] >> (CPU.VPR[vb]._u16[h] & 0xf); } } void VSRAW(u32 vd, u32 va, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._s32[w] = CPU.VPR[va]._s32[w] >> (CPU.VPR[vb]._u8[w*4] & 0x1f); + CPU.VPR[vd]._s32[w] = CPU.VPR[va]._s32[w] >> (CPU.VPR[vb]._u32[w] & 0x1f); } } void VSRB(u32 vd, u32 va, u32 vb) @@ -1750,25 +1775,26 @@ private: { for (uint h = 0; h < 8; h++) { - CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] >> (CPU.VPR[vb]._u8[h*2] & 0xf); + CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] >> (CPU.VPR[vb]._u16[h] & 0xf); } } void VSRO(u32 vd, u32 va, u32 vb) { + u128 VA = CPU.VPR[va]; u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf; CPU.VPR[vd].clear(); for (u8 b = 0; b < 16 - nShift; b++) { - CPU.VPR[vd]._u8[b] = CPU.VPR[va]._u8[b + nShift]; + CPU.VPR[vd]._u8[b] = VA._u8[b + nShift]; } } void VSRW(u32 vd, u32 va, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] >> (CPU.VPR[vb]._u8[w*4] & 0x1f); + CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] >> (CPU.VPR[vb]._u32[w] & 0x1f); } } void VSUBCUW(u32 vd, u32 va, u32 vb) //nf @@ -2029,50 +2055,56 @@ private: } void VUPKHPX(u32 vd, u32 vb) { + u128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._s8[(3 - w)*4 + 3] = CPU.VPR[vb]._s8[w*2 + 0] >> 7; // signed shift sign extends - CPU.VPR[vd]._u8[(3 - w)*4 + 2] = (CPU.VPR[vb]._u8[w*2 + 0] >> 2) & 0x1f; - CPU.VPR[vd]._u8[(3 - w)*4 + 1] = ((CPU.VPR[vb]._u8[w*2 + 0] & 0x3) << 3) | ((CPU.VPR[vb]._u8[w*2 + 1] >> 5) & 0x7); - CPU.VPR[vd]._u8[(3 - w)*4 + 0] = CPU.VPR[vb]._u8[w*2 + 1] & 0x1f; + CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[8 + w*2 + 1] >> 7; // signed shift sign extends + CPU.VPR[vd]._u8[w*4 + 2] = (VB._u8[8 + w*2 + 1] >> 2) & 0x1f; + CPU.VPR[vd]._u8[w*4 + 1] = ((VB._u8[8 + w*2 + 1] & 0x3) << 3) | ((VB._u8[8 + w*2 + 0] >> 5) & 0x7); + CPU.VPR[vd]._u8[w*4 + 0] = VB._u8[8 + w*2 + 0] & 0x1f; } } void VUPKHSB(u32 vd, u32 vb) { + u128 VB = CPU.VPR[vb]; for (uint h = 0; h < 8; h++) { - CPU.VPR[vd]._s16[h] = CPU.VPR[vb]._s8[h]; + CPU.VPR[vd]._s16[h] = VB._s8[8 + h]; } } void VUPKHSH(u32 vd, u32 vb) { + u128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._s32[w] = CPU.VPR[vb]._s16[w]; + CPU.VPR[vd]._s32[w] = VB._s16[4 + w]; } } void VUPKLPX(u32 vd, u32 vb) { + u128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._s8[(3 - w)*4 + 3] = CPU.VPR[vb]._s8[8 + w*2 + 0] >> 7; // signed shift sign extends - CPU.VPR[vd]._u8[(3 - w)*4 + 2] = (CPU.VPR[vb]._u8[8 + w*2 + 0] >> 2) & 0x1f; - CPU.VPR[vd]._u8[(3 - w)*4 + 1] = ((CPU.VPR[vb]._u8[8 + w*2 + 0] & 0x3) << 3) | ((CPU.VPR[vb]._u8[8 + w*2 + 1] >> 5) & 0x7); - CPU.VPR[vd]._u8[(3 - w)*4 + 0] = CPU.VPR[vb]._u8[8 + w*2 + 1] & 0x1f; + CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[w*2 + 1] >> 7; // signed shift sign extends + CPU.VPR[vd]._u8[w*4 + 2] = (VB._u8[w*2 + 1] >> 2) & 0x1f; + CPU.VPR[vd]._u8[w*4 + 1] = ((VB._u8[w*2 + 1] & 0x3) << 3) | ((VB._u8[w*2 + 0] >> 5) & 0x7); + CPU.VPR[vd]._u8[w*4 + 0] = VB._u8[w*2 + 0] & 0x1f; } } void VUPKLSB(u32 vd, u32 vb) //nf { + u128 VB = CPU.VPR[vb]; for (uint h = 0; h < 8; h++) { - CPU.VPR[vd]._s16[h] = CPU.VPR[vb]._s8[8 + h]; + CPU.VPR[vd]._s16[h] = VB._s8[h]; } } void VUPKLSH(u32 vd, u32 vb) { + u128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._s32[w] = CPU.VPR[vb]._s16[4 + w]; + CPU.VPR[vd]._s32[w] = VB._s16[w]; } } void VXOR(u32 vd, u32 va, u32 vb) @@ -2792,7 +2824,7 @@ private: return; } const u8 eb = (addr & 0xf) >> 1; - vm::write16((u32)addr, CPU.VPR[vs]._u16[7 - eb]); + vm::write16((u32)addr & 0xFFFFFFFE, CPU.VPR[vs]._u16[7 - eb]); } void STDUX(u32 rs, u32 ra, u32 rb) { @@ -2828,7 +2860,7 @@ private: return; } const u8 eb = (addr & 0xf) >> 2; - vm::write32((u32)addr, CPU.VPR[vs]._u32[3 - eb]); + vm::write32((u32)addr & 0xFFFFFFFC, CPU.VPR[vs]._u32[3 - eb]); } void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 85e75b9a03..b88489bc5e 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -1016,7 +1016,26 @@ void Compiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { } void Compiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { - InterpreterCall("VMSUMSHS", &PPUInterpreter::VMSUMSHS, vd, va, vb, vc); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); + + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vc_v4i32, 31); + tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); + auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto tmp2_v4i32 = m_ir_builder->CreateXor(vc_v4i32, res_v4i32); + tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); + auto sum_v4i32 = m_ir_builder->CreateAdd(vc_v4i32, res_v4i32); + auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto tmp3_v4i32 = m_ir_builder->CreateXor(vc_v4i32, sum_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); + auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); + SetVr(vd, res_v16i8); + + // TODO: Set VSCR.SAT } void Compiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { @@ -1074,7 +1093,31 @@ void Compiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { } void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { - InterpreterCall("VMSUMUHS", &PPUInterpreter::VMSUMUHS, vd, va, vb, vc); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + + auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)); + u32 mask1_v4i32[4] = {0, 2, 4, 6}; + auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = {1, 3, 5, 7}; + auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); + auto cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, tmp1_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vc_v4i32); + cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); + + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT } void Compiler::VMULESB(u32 vd, u32 va, u32 vb) { @@ -1204,7 +1247,37 @@ void Compiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) { } void Compiler::VPKPX(u32 vd, u32 va, u32 vb) { - InterpreterCall("VPKPX", &PPUInterpreter::VPKPX, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + auto tmpa_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); + tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); + tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); + tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); + tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); + auto tmpa_v8i16 = m_ir_builder->CreateBitCast(tmpa_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + + auto tmpb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); + tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); + tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); + tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); + tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); + auto tmpb_v8i16 = m_ir_builder->CreateBitCast(tmpb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + + u32 mask_v8i32[8] = {1, 3, 5, 7, 9, 11, 13, 15}; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(tmpb_v8i16, tmpa_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + + SetVr(vd, res_v8i16); + + // TODO: Implement with pext on CPUs with BMI } void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) { @@ -1669,27 +1742,69 @@ void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) { } void Compiler::VUPKHPX(u32 vd, u32 vb) { - InterpreterCall("VUPKHPX", &PPUInterpreter::VUPKHPX, vd, vb); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = { 4, 4, 5, 5, 6, 6, 7, 7 }; + vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + + auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); + tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); + auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VUPKHSB(u32 vd, u32 vb) { - InterpreterCall("VUPKHSB", &PPUInterpreter::VUPKHSB, vd, vb); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v8i32[8] = { 8, 9, 10, 11, 12, 13, 14, 15 }; + auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, res_v8i16); } void Compiler::VUPKHSH(u32 vd, u32 vb) { - InterpreterCall("VUPKHSH", &PPUInterpreter::VUPKHSH, vd, vb); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v4i32[4] = { 4, 5, 6, 7 }; + auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); } void Compiler::VUPKLPX(u32 vd, u32 vb) { - InterpreterCall("VUPKLPX", &PPUInterpreter::VUPKLPX, vd, vb); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = { 0, 0, 1, 1, 2, 2, 3, 3 }; + vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + + auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); + tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); + auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VUPKLSB(u32 vd, u32 vb) { - InterpreterCall("VUPKLSB", &PPUInterpreter::VUPKLSB, vd, vb); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v8i32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, res_v8i16); } void Compiler::VUPKLSH(u32 vd, u32 vb) { - InterpreterCall("VUPKLSH", &PPUInterpreter::VUPKLSH, vd, vb); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v4i32[4] = { 0, 1, 2, 3 }; + auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); } void Compiler::VXOR(u32 vd, u32 va, u32 vb) { @@ -5250,9 +5365,9 @@ std::shared_ptr RecompilationEngine::s_the_instance = nullp RecompilationEngine::RecompilationEngine() : ThreadBase("PPU Recompilation Engine") + , m_log(nullptr) , m_next_ordinal(0) - , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) - , m_log(nullptr) { + , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) { m_compiler.RunAllTests(); } diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index cc52a24858..8249da1c97 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -1022,6 +1022,9 @@ namespace ppu_recompiler_llvm { }; }; + /// Log + llvm::raw_fd_ostream * m_log; + /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. std::mutex m_pending_execution_traces_lock; @@ -1047,9 +1050,6 @@ namespace ppu_recompiler_llvm { /// PPU Compiler Compiler m_compiler; - /// Log - llvm::raw_fd_ostream * m_log; - /// Executable lookup table Executable m_executable_lookup[10000]; // TODO: Adjust size diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index b7638eeeca..f4c3319428 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -432,9 +432,10 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLW, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMMBM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHM, 0, 5, 0, 1, 2, 3); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHS, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUBM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHM, 0, 5, 0, 1, 2, 3); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0, 1, 2, 3); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHS, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUB, 0, 5, 0, 1, 2); @@ -443,9 +444,11 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUH, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0, 1, 2, 3); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKPX, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHSS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHUS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWSS, 0, 5, 0, 1, 2); @@ -494,6 +497,12 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHPX, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSB, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSH, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLPX, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSB, 0, 5, 0, 1); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSH, 0, 5, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VXOR, 0, 5, 0, 1, 2); // TODO: Rest of the vector instructions From ce0f713f03e748eda69bf64aad200a21e7b55d54 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Sun, 30 Nov 2014 02:46:29 +0530 Subject: [PATCH 12/13] PPU: Implement more instructions in the recompiler. Fix some instructions in the interpreter. --- rpcs3/Emu/Cell/PPUInterpreter.h | 48 ++++++++++---- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 97 ++++++++++++++++++++++++++-- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 6 ++ 3 files changed, 131 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 76a9991a62..b4f308a2ee 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -4112,7 +4112,7 @@ private: } void MTFSB1(u32 crbd, bool rc) { - u64 mask = (1ULL << crbd); + u64 mask = (1ULL << (31 - crbd)); if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); CPU.FPSCR.FPSCR |= mask; @@ -4120,13 +4120,32 @@ private: } void MCRFS(u32 crbd, u32 crbs) { - u64 mask = (1ULL << crbd); - CPU.CR.CR &= ~mask; - CPU.CR.CR |= CPU.FPSCR.FPSCR & mask; + CPU.SetCR(crbd, (CPU.FPSCR.FPSCR >> ((7 - crbs) * 4)) & 0xf); + + switch (crbs) + { + case 0: + CPU.FPSCR.FX = CPU.FPSCR.OX = 0; + break; + case 1: + CPU.FPSCR.UX = CPU.FPSCR.ZX = CPU.FPSCR.XX = CPU.FPSCR.VXSNAN = 0; + break; + case 2: + CPU.FPSCR.VXISI = CPU.FPSCR.VXIDI = CPU.FPSCR.VXZDZ = CPU.FPSCR.VXIMZ = 0; + break; + case 3: + CPU.FPSCR.VXVC = 0; + break; + case 5: + CPU.FPSCR.VXSOFT = CPU.FPSCR.VXSQRT = CPU.FPSCR.VXCVI = 0; + break; + default: + break; + } } void MTFSB0(u32 crbd, bool rc) { - u64 mask = (1ULL << crbd); + u64 mask = (1ULL << (31 - crbd)); if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); CPU.FPSCR.FPSCR &= ~mask; @@ -4134,17 +4153,18 @@ private: } void MTFSFI(u32 crfd, u32 i, bool rc) { - u64 mask = (0x1ULL << crfd); + u32 mask = 0xF0000000 >> (crfd * 4); + u32 val = (i & 0xF) << ((7 - crfd) * 4); - if(i) + const u32 oldNI = CPU.FPSCR.NI; + CPU.FPSCR.FPSCR &= ~mask; + CPU.FPSCR.FPSCR |= val; + if (CPU.FPSCR.NI != oldNI) { - if ((crfd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); - CPU.FPSCR.FPSCR |= mask; - } - else - { - if ((crfd == 29) && CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); - CPU.FPSCR.FPSCR &= ~mask; + if (oldNI) + LOG_WARNING(PPU, "Non-IEEE mode disabled"); + else + LOG_WARNING(PPU, "Non-IEEE mode enabled"); } if(rc) UNIMPLEMENTED(); diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index b88489bc5e..042f2457f3 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -4305,27 +4305,99 @@ void Compiler::STDU(u32 rs, u32 ra, s32 ds) { } void Compiler::MTFSB1(u32 crbd, bool rc) { - InterpreterCall("MTFSB1", &PPUInterpreter::MTFSB1, crbd, rc); + auto fpscr_i32 = GetFpscr(); + SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false); + SetFpscr(fpscr_i32); + + if (rc) { + // TODO: Implement this + CompilationError("MTFSB1."); + } } void Compiler::MCRFS(u32 crbd, u32 crbs) { - InterpreterCall("MCRFS", &PPUInterpreter::MCRFS, crbd, crbs); + auto fpscr_i32 = GetFpscr(); + auto val_i32 = GetNibble(fpscr_i32, crbs); + SetCrField(crbd, val_i32); + + switch (crbs) { + case 0: + fpscr_i32 = ClrBit(fpscr_i32, 0); + fpscr_i32 = ClrBit(fpscr_i32, 3); + break; + case 1: + fpscr_i32 = ClrNibble(fpscr_i32, 1); + break; + case 2: + fpscr_i32 = ClrNibble(fpscr_i32, 2); + break; + case 3: + fpscr_i32 = ClrBit(fpscr_i32, 12); + break; + case 5: + fpscr_i32 = ClrBit(fpscr_i32, 21); + fpscr_i32 = ClrBit(fpscr_i32, 22); + fpscr_i32 = ClrBit(fpscr_i32, 23); + break; + default: + break; + } + + SetFpscr(fpscr_i32); } void Compiler::MTFSB0(u32 crbd, bool rc) { - InterpreterCall("MTFSB0", &PPUInterpreter::MTFSB0, crbd, rc); + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = ClrBit(fpscr_i32, crbd); + SetFpscr(fpscr_i32); + + if (rc) { + // TODO: Implement this + CompilationError("MTFSB0."); + } } void Compiler::MTFSFI(u32 crfd, u32 i, bool rc) { - InterpreterCall("MTFSFI", &PPUInterpreter::MTFSFI, crfd, i, rc); + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = SetNibble(fpscr_i32, crfd, m_ir_builder->getInt32(i & 0xF)); + SetFpscr(fpscr_i32); + + if (rc) { + // TODO: Implement this + CompilationError("MTFSFI."); + } } void Compiler::MFFS(u32 frd, bool rc) { - InterpreterCall("MFFS", &PPUInterpreter::MFFS, frd, rc); + auto fpscr_i32 = GetFpscr(); + auto fpscr_i64 = m_ir_builder->CreateZExt(fpscr_i32, m_ir_builder->getInt64Ty()); + SetFpr(frd, fpscr_i64); + + if (rc) { + // TODO: Implement this + CompilationError("MFFS."); + } } void Compiler::MTFSF(u32 flm, u32 frb, bool rc) { - InterpreterCall("MTFSF", &PPUInterpreter::MTFSF, flm, frb, rc); + u32 mask = 0; + for(u32 i = 0; i < 8; i++) { + if (flm & (1 << i)) { + mask |= 0xF << (i * 4); + } + } + + auto rb_i32 = GetFpr(frb, 32, true); + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = m_ir_builder->CreateAnd(fpscr_i32, ~mask); + rb_i32 = m_ir_builder->CreateAnd(rb_i32, mask); + fpscr_i32 = m_ir_builder->CreateOr(fpscr_i32, rb_i32); + SetFpscr(fpscr_i32); + + if (rc) { + // TODO: Implement this + CompilationError("MTFSF."); + } } void Compiler::FCMPU(u32 crfd, u32 fra, u32 frb) { @@ -4987,6 +5059,19 @@ void Compiler::SetUsprg0(Value * val_x64) { m_ir_builder->CreateAlignedStore(val_i64, usprg0_i64_ptr, 8); } +Value * Compiler::GetFpscr() { + auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); + auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(fpscr_i32_ptr, 4); +} + +void Compiler::SetFpscr(Value * val_x32) { + auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); + auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); + auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, fpscr_i32_ptr, 4); +} + Value * Compiler::GetFpr(u32 r, u32 bits, bool as_int) { auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); if (!as_int) { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 8249da1c97..7d351638c7 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -869,6 +869,12 @@ namespace ppu_recompiler_llvm { /// Set USPRG0 void SetUsprg0(llvm::Value * val_x64); + /// Load FPSCR + llvm::Value * GetFpscr(); + + /// Set FPSCR + void SetFpscr(llvm::Value * val_x32); + /// Get FPR llvm::Value * GetFpr(u32 r, u32 bits = 64, bool as_int = false); From 6cc6ca9f3c9756e5a1db1b085826db4f798adbae Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 1 Dec 2014 02:09:48 +0530 Subject: [PATCH 13/13] PPU: Implemented more instructions --- rpcs3/Emu/Cell/PPUInterpreter.h | 6 +- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 310 ++++++++++++++++++---- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 14 +- rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp | 51 +++- 4 files changed, 308 insertions(+), 73 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index b4f308a2ee..8a8a187881 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -893,7 +893,7 @@ private: // and between different executions on the same implementation. for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = log2(CPU.VPR[vb]._f[w]); + CPU.VPR[vd]._f[w] = log2f(CPU.VPR[vb]._f[w]); } } void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) @@ -2824,7 +2824,7 @@ private: return; } const u8 eb = (addr & 0xf) >> 1; - vm::write16((u32)addr & 0xFFFFFFFE, CPU.VPR[vs]._u16[7 - eb]); + vm::write16((u32)addr, CPU.VPR[vs]._u16[7 - eb]); } void STDUX(u32 rs, u32 ra, u32 rb) { @@ -2860,7 +2860,7 @@ private: return; } const u8 eb = (addr & 0xf) >> 2; - vm::write32((u32)addr & 0xFFFFFFFC, CPU.VPR[vs]._u32[3 - eb]); + vm::write32((u32)addr, CPU.VPR[vs]._u32[3 - eb]); } void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 042f2457f3..34b2fb9c05 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -104,8 +104,6 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & auto arg_i = m_state.function->arg_begin(); arg_i->setName("ppu_state"); m_state.args[CompileTaskState::Args::State] = arg_i; - (++arg_i)->setName("interpreter"); - m_state.args[CompileTaskState::Args::Interpreter] = arg_i; (++arg_i)->setName("context"); m_state.args[CompileTaskState::Args::Context] = arg_i; @@ -178,7 +176,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & m_ir_builder->SetInsertPoint(then_bb); context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty()); context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - m_ir_builder->CreateCall3(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); + m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(merge_bb); @@ -204,7 +202,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & m_ir_builder->SetInsertPoint(then_bb); auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - m_ir_builder->CreateCall3(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); + m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); m_ir_builder->CreateBr(merge_bb); m_ir_builder->SetInsertPoint(merge_bb); @@ -1098,23 +1096,21 @@ void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + auto tmp_v8i64 = m_ir_builder->CreateZExt(tmp_v8i32, VectorType::get(m_ir_builder->getInt64Ty(), 8)); - auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)); u32 mask1_v4i32[4] = {0, 2, 4, 6}; - auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); u32 mask2_v4i32[4] = {1, 3, 5, 7}; - auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto tmp1_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto tmp2_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); - auto cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, tmp1_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vc_v4i32); - cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); - + auto vc_v4i64 = m_ir_builder->CreateZExt(vc_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto res_v4i64 = m_ir_builder->CreateAdd(tmp1_v4i64, tmp2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vc_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpUGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF))); + auto gt_v4i64 = m_ir_builder->CreateSExt(gt_v4i1, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + res_v4i64 = m_ir_builder->CreateOr(res_v4i64, gt_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); SetVr(vd, res_v4i32); // TODO: Set VSCR.SAT @@ -1722,23 +1718,130 @@ void Compiler::VSUBUWS(u32 vd, u32 va, u32 vb) { } void Compiler::VSUMSWS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VSUMSWS", &PPUInterpreter::VSUMSWS, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + auto res_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(3)); + auto res_i64 = m_ir_builder->CreateSExt(res_i32, m_ir_builder->getInt64Ty()); + for (auto i = 0; i < 4; i++) { + auto va_i32 = m_ir_builder->CreateExtractElement(va_v4i32, m_ir_builder->getInt32(i)); + auto va_i64 = m_ir_builder->CreateSExt(va_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateAdd(res_i64, va_i64); + } + + auto gt_i1 = m_ir_builder->CreateICmpSGT(res_i64, m_ir_builder->getInt64(0x7FFFFFFFull)); + auto lt_i1 = m_ir_builder->CreateICmpSLT(res_i64, m_ir_builder->getInt64(0xFFFFFFFF80000000ull)); + res_i64 = m_ir_builder->CreateSelect(gt_i1, m_ir_builder->getInt64(0x7FFFFFFFull), res_i64); + res_i64 = m_ir_builder->CreateSelect(lt_i1, m_ir_builder->getInt64(0xFFFFFFFF80000000ull), res_i64); + auto res_i128 = m_ir_builder->CreateZExt(res_i64, m_ir_builder->getIntNTy(128)); + + SetVr(vd, res_i128); + + // TODO: Set VSCR.SAT } void Compiler::VSUM2SWS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VSUM2SWS", &PPUInterpreter::VSUM2SWS, vd, va, vb); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + u32 mask1_v2i32[2] = { 0, 2 }; + u32 mask2_v2i32[2] = { 1, 3 }; + auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto va1_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); + auto va2_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v2i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto vb_v2i64 = m_ir_builder->CreateShuffleVector(vb_v4i64, UndefValue::get(vb_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); + + auto res_v2i64 = m_ir_builder->CreateAdd(va1_v2i64, va2_v2i64); + res_v2i64 = m_ir_builder->CreateAdd(res_v2i64, vb_v2i64); + auto gt_v2i1 = m_ir_builder->CreateICmpSGT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v2i1 = m_ir_builder->CreateICmpSLT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v2i64 = m_ir_builder->CreateSelect(gt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v2i64); + res_v2i64 = m_ir_builder->CreateSelect(lt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x80000000ull)), res_v2i64); + SetVr(vd, res_v2i64); + + // TODO: Set VSCR.SAT } void Compiler::VSUM4SBS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VSUM4SBS", &PPUInterpreter::VSUM4SBS, vd, va, vb); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; + u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; + u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; + u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; + auto va_v16i64 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt64Ty(), 16)); + auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto va3_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto va4_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + + auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va3_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va4_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); + res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT } void Compiler::VSUM4SHS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VSUM4SHS", &PPUInterpreter::VSUM4SHS, vd, va, vb); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; + u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; + auto va_v8i64 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt64Ty(), 8)); + auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + + auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); + res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT } void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) { - InterpreterCall("VSUM4UBS", &PPUInterpreter::VSUM4UBS, vd, va, vb); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; + u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; + u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; + u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; + auto va1_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va1_v4i32 = m_ir_builder->CreateZExt(va1_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va2_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto va2_v4i32 = m_ir_builder->CreateZExt(va2_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va3_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto va3_v4i32 = m_ir_builder->CreateZExt(va3_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va4_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto va4_v4i32 = m_ir_builder->CreateZExt(va4_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + + auto res_v4i32 = m_ir_builder->CreateAdd(va1_v4i32, va2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va3_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va4_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vb_v4i32); + auto lt_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vb_v4i32); + auto lt_v4i32 = m_ir_builder->CreateSExt(lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateOr(lt_v4i32, res_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT } void Compiler::VUPKHPX(u32 vd, u32 vb) { @@ -2816,7 +2919,45 @@ void Compiler::STDX(u32 rs, u32 ra, u32 rb) { } void Compiler::STWCX_(u32 rs, u32 ra, u32 rb) { - InterpreterCall("STWCX_", &PPUInterpreter::STWCX_, rs, ra, rb); + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_ADDR)); + auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + auto resv_addr_i64 = (Value *)m_ir_builder->CreateAlignedLoad(resv_addr_i64_ptr, 8); + auto cmp_i1 = m_ir_builder->CreateICmpEQ(addr_i64, resv_addr_i64); + + auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then"); + auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "else"); + auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge"); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb); + + m_ir_builder->SetInsertPoint(then_bb); + auto rs_i32 = GetGpr(rs, 32); + rs_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, rs_i32->getType()), rs_i32); + resv_addr_i64 = m_ir_builder->CreateAdd(resv_addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto resv_addr_val_i32_ptr = m_ir_builder->CreateIntToPtr(resv_addr_i64, m_ir_builder->getInt32Ty()->getPointerTo()); + auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_VALUE)); + auto resv_val_i32_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + auto resv_val_i32 = m_ir_builder->CreateAlignedLoad(resv_val_i32_ptr, 8); + + auto res_s = m_ir_builder->CreateAtomicCmpXchg(resv_addr_val_i32_ptr, resv_val_i32, rs_i32, AtomicOrdering::AcquireRelease, AtomicOrdering::Monotonic); + auto success_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto cr_i32 = GetCr(); + cr_i32 = SetBit(cr_i32, 2, success_i1); + SetCr(cr_i32); + m_ir_builder->CreateAlignedStore(m_ir_builder->getInt64(0), resv_addr_i64_ptr, 8); + m_ir_builder->CreateBr(merge_bb); + + m_ir_builder->SetInsertPoint(else_bb); + cr_i32 = GetCr(); + cr_i32 = ClrBit(cr_i32, 2); + SetCr(cr_i32); + m_ir_builder->CreateBr(merge_bb); + m_ir_builder->SetInsertPoint(merge_bb); } void Compiler::STWX(u32 rs, u32 ra, u32 rb) { @@ -2919,7 +3060,45 @@ void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { } void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) { - InterpreterCall("STDCX_", &PPUInterpreter::STDCX_, rs, ra, rb); + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_ADDR)); + auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + auto resv_addr_i64 = (Value *)m_ir_builder->CreateAlignedLoad(resv_addr_i64_ptr, 8); + auto cmp_i1 = m_ir_builder->CreateICmpEQ(addr_i64, resv_addr_i64); + + auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then"); + auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "else"); + auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge"); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb); + + m_ir_builder->SetInsertPoint(then_bb); + auto rs_i64 = GetGpr(rs, 64); + rs_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, rs_i64->getType()), rs_i64); + resv_addr_i64 = m_ir_builder->CreateAdd(resv_addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto resv_addr_val_i64_ptr = m_ir_builder->CreateIntToPtr(resv_addr_i64, m_ir_builder->getInt64Ty()->getPointerTo()); + auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_VALUE)); + auto resv_val_i64_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + auto resv_val_i64 = m_ir_builder->CreateAlignedLoad(resv_val_i64_ptr, 8); + + auto res_s = m_ir_builder->CreateAtomicCmpXchg(resv_addr_val_i64_ptr, resv_val_i64, rs_i64, AtomicOrdering::AcquireRelease, AtomicOrdering::Monotonic); + auto success_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto cr_i32 = GetCr(); + cr_i32 = SetBit(cr_i32, 2, success_i1); + SetCr(cr_i32); + m_ir_builder->CreateAlignedStore(m_ir_builder->getInt64(0), resv_addr_i64_ptr, 8); + m_ir_builder->CreateBr(merge_bb); + + m_ir_builder->SetInsertPoint(else_bb); + cr_i32 = GetCr(); + cr_i32 = ClrBit(cr_i32, 2); + SetCr(cr_i32); + m_ir_builder->CreateBr(merge_bb); + m_ir_builder->SetInsertPoint(merge_bb); } void Compiler::STBX(u32 rs, u32 ra, u32 rb) { @@ -3414,7 +3593,7 @@ void Compiler::LDBRX(u32 rd, u32 ra, u32 rb) { } void Compiler::LSWX(u32 rd, u32 ra, u32 rb) { - InterpreterCall("LSWX", &PPUInterpreter::LSWX, rd, ra, rb); + CompilationError("LSWX"); } void Compiler::LWBRX(u32 rd, u32 ra, u32 rb) { @@ -3543,11 +3722,32 @@ void Compiler::LFDUX(u32 frd, u32 ra, u32 rb) { } void Compiler::STVLX(u32 vs, u32 ra, u32 rb) { - InterpreterCall("STVLX", &PPUInterpreter::STVLX, vs, ra, rb); + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + auto size_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), index_i64); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); + + auto vs_i128 = GetVr(vs); + vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); + auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); + vs_i128_ptr->setAlignment(16); + m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); + auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); + + Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; + m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), + addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); } void Compiler::STSWX(u32 rs, u32 ra, u32 rb) { - InterpreterCall("STSWX", &PPUInterpreter::STSWX, rs, ra, rb); + CompilationError("STSWX"); } void Compiler::STWBRX(u32 rs, u32 ra, u32 rb) { @@ -3572,7 +3772,29 @@ void Compiler::STFSX(u32 frs, u32 ra, u32 rb) { } void Compiler::STVRX(u32 vs, u32 ra, u32 rb) { - InterpreterCall("STVRX", &PPUInterpreter::STVRX, vs, ra, rb); + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto size_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + auto index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), size_i64); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFF0); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); + + auto vs_i128 = GetVr(vs); + vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); + auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); + vs_i128_ptr->setAlignment(16); + m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); + auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); + vs_i8_ptr = m_ir_builder->CreateGEP(vs_i8_ptr, index_i64); + + Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; + m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), + addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); } void Compiler::STFSUX(u32 frs, u32 ra, u32 rb) { @@ -4306,7 +4528,7 @@ void Compiler::STDU(u32 rs, u32 ra, s32 ds) { void Compiler::MTFSB1(u32 crbd, bool rc) { auto fpscr_i32 = GetFpscr(); - SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false); + fpscr_i32 = SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false); SetFpscr(fpscr_i32); if (rc) { @@ -5241,7 +5463,7 @@ void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool auto switch_instr = m_ir_builder->CreateSwitch(target_i32, unknown_function_block); m_ir_builder->SetInsertPoint(unknown_function_block); - m_ir_builder->CreateCall3(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], m_ir_builder->getInt64(0)); + m_ir_builder->CreateCall2(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_ir_builder->getInt64(0)); m_ir_builder->CreateBr(next_block); auto call_i = m_state.cfg->calls.find(m_state.current_instruction_address); @@ -5370,18 +5592,6 @@ void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool } } -template -Value * Compiler::InterpreterCall(const char * name, Func function, Args... args) { - auto i = m_stats.interpreter_fallback_stats.find(name); - if (i == m_stats.interpreter_fallback_stats.end()) { - i = m_stats.interpreter_fallback_stats.insert(m_stats.interpreter_fallback_stats.end(), std::make_pair(name, 0)); - } - - i->second++; - - return Call(name, function, m_state.args[CompileTaskState::Args::Interpreter], m_ir_builder->getInt32(args)...); -} - template Type * Compiler::CppToLlvmType() { if (std::is_void::value) { @@ -5428,7 +5638,7 @@ llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_f auto location_i64_ptr = m_ir_builder->CreateIntToPtr(location_i64, m_ir_builder->getInt64Ty()->getPointerTo()); auto executable_i64 = m_ir_builder->CreateLoad(location_i64_ptr); auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_i64, m_compiled_function_type->getPointerTo()); - return m_ir_builder->CreateCall3(executable_ptr, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64); + return m_ir_builder->CreateCall2(executable_ptr, m_state.args[CompileTaskState::Args::State], context_i64); } void Compiler::CompilationError(const std::string & error) { @@ -5601,10 +5811,6 @@ void RecompilationEngine::Task() { Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; Log() << "Ordinals allocated = " << m_next_ordinal << "\n"; - Log() << "\nInterpreter fallback stats:\n"; - for (auto i = compiler_stats.interpreter_fallback_stats.begin(); i != compiler_stats.interpreter_fallback_stats.end(); i++) { - Log() << i->first << " = " << i->second << "\n"; - } LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting."); s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this. @@ -5813,7 +6019,7 @@ ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() { } u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { - ExecuteFunction(&m_ppu, m_interpreter, 0); + ExecuteFunction(&m_ppu, 0); return 0; } @@ -5854,13 +6060,13 @@ Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Exec return executable; } -u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context) { +u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(PPUThread * ppu_state, u64 context) { auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder(); execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0); - return ExecuteTillReturn(ppu_state, interpreter, 0); + return ExecuteTillReturn(ppu_state, 0); } -u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context) { +u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_state, u64 context) { auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder(); auto terminate = false; auto branch_type = BranchType::NonBranch; @@ -5878,7 +6084,7 @@ u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_stat auto executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteTillReturn); if (executable != ExecuteTillReturn && executable != ExecuteFunction) { auto entry = ppu_state->PC; - auto exit = (u32)executable(ppu_state, interpreter, 0); + auto exit = (u32)executable(ppu_state, 0); execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit); if (exit == 0) { terminate = true; @@ -5898,7 +6104,7 @@ u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_stat case BranchType::FunctionCall: execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction); - executable(ppu_state, interpreter, 0); + executable(ppu_state, 0); break; case BranchType::LocalBranch: break; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 7d351638c7..1d656cc4a1 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -253,7 +253,7 @@ namespace ppu_recompiler_llvm { }; /// Pointer to an executable - typedef u32(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context); + typedef u32(*Executable)(PPUThread * ppu_state, u64 context); /// PPU compiler that uses LLVM for code generation and optimization class Compiler : protected PPUOpcodes, protected PPCDecoder { @@ -270,9 +270,6 @@ namespace ppu_recompiler_llvm { /// Total time std::chrono::nanoseconds total_time; - - /// Contains the number of times interpreter fallback was used - std::map interpreter_fallback_stats; }; Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, const Executable execute_unknown_block); @@ -705,7 +702,6 @@ namespace ppu_recompiler_llvm { struct CompileTaskState { enum Args { State, - Interpreter, Context, MaxArgs, }; @@ -914,10 +910,6 @@ namespace ppu_recompiler_llvm { /// Write to memory void WriteMemory(llvm::Value * addr_i64, llvm::Value * val_ix, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); - /// Call an interpreter function - template - llvm::Value * InterpreterCall(const char * name, Func function, Args... args); - /// Convert a C++ type to an LLVM type template llvm::Type * CppToLlvmType(); @@ -1166,10 +1158,10 @@ namespace ppu_recompiler_llvm { Executable GetExecutable(u32 address, Executable default_executable) const; /// Execute a function - static u32 ExecuteFunction(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context); + static u32 ExecuteFunction(PPUThread * ppu_state, u64 context); /// Execute till the current function returns - static u32 ExecuteTillReturn(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context); + static u32 ExecuteTillReturn(PPUThread * ppu_state, u64 context); }; /// Get the branch type from a branch instruction diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index f4c3319428..f30a713c09 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -243,8 +243,6 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: auto arg_i = m_state.function->arg_begin(); arg_i->setName("ppu_state"); m_state.args[CompileTaskState::Args::State] = arg_i; - (++arg_i)->setName("interpreter"); - m_state.args[CompileTaskState::Args::Interpreter] = arg_i; (++arg_i)->setName("context"); m_state.args[CompileTaskState::Args::Context] = arg_i; m_state.current_instruction_address = s_ppu_state->PC; @@ -265,7 +263,7 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: std::string verify_results; raw_string_ostream verify_results_ostream(verify_results); if (verifyFunction(*m_state.function, &verify_results_ostream)) { - m_recompilation_engine.Log() << "Verification Failed:\n" << verify_results; + m_recompilation_engine.Log() << "Verification Failed:\n" << verify_results << '\n'; return; } @@ -298,7 +296,7 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: // Run the test input(); auto executable = (Executable)m_execution_engine->getPointerToFunction(m_state.function); - executable(s_ppu_state, s_interpreter, 0); + executable(s_ppu_state, 0); // Verify results std::string msg; @@ -497,6 +495,11 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUMSWS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM2SWS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4SBS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4SHS, 0, 5, 0, 1, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4UBS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHPX, 0, 5, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSB, 0, 5, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSH, 0, 5, 0, 1); @@ -672,6 +675,27 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUBS, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUBS, 0, 5, 0, 1, 2, 3, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADDS, 0, 5, 0, 1, 2, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 0, 5, 0, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 5, 5, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 10, 5, 25, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 15, 5, 31, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 0, 5, 0, 7); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 5, 5, 7, 0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 10, 5, 5, 2); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 15, 5, 5, 3); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 0, 5, 0, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 5, 5, 3, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 10, 5, 25, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 15, 5, 31, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 0, 5, 0, 1, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 5, 5, 2, 6, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 10, 5, 5, 11, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 15, 5, 7, 14, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFFS, 0, 5, 0, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 0, 5, 0, 0, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 5, 5, 2, 0, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 10, 5, 5, 0, false); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 15, 5, 7, 0, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCMPU, 0, 5, 5, 0, 1); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSP, 0, 5, 0, 1, false); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIW, 0, 5, 0, 1, false); @@ -697,9 +721,12 @@ void Compiler::RunAllTests() { PPUState input; input.SetRandom(0x10000); - input.GPR[14] = 10; - input.GPR[21] = 15; - input.GPR[23] = 0x10000; + input.GPR[14] = 10; + input.GPR[21] = 15; + input.GPR[23] = 0x10000; + input.R_ADDR = 0x10000; + input.R_VALUE = 0x1122334455667788; + input.mem_block[0] = 0x8877665544332211; VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 0, input, 5, 0, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 1, input, 5, 14, 0x10000); @@ -792,6 +819,8 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 0, input, 3, 0, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 1, input, 3, 14, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBU, 0, input, 3, 14, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDCX_, 0, input, 3, 0, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDCX_, 1, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 0, input, 3, 0, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 1, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBUX, 0, input, 3, 14, 23); @@ -810,18 +839,26 @@ void Compiler::RunAllTests() { VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 0, input, 3, 0, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 1, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWUX, 0, input, 3, 14, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 0, input, 0, 0, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 1, input, 0, 14, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 2, input, 0, 21, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWBRX, 0, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 0, input, 3, 0, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 1, input, 3, 14, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDU, 0, input, 3, 14, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 0, input, 3, 0, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 1, input, 3, 14, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWCX_, 0, input, 3, 0, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWCX_, 1, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDUX, 0, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 0, input, 3, 0, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 1, input, 3, 14, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSU, 0, input, 3, 14, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 0, input, 3, 0, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 1, input, 3, 14, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 0, input, 0, 0, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 1, input, 0, 14, 23); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 2, input, 0, 21, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSUX, 0, input, 3, 14, 23); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 0, input, 3, 0, 0x10000); VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 1, input, 3, 14, 0x10000);