From 794cd4cf89be0e360b0b369b84a3bb13974a3723 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 28 Nov 2013 02:15:45 +0400 Subject: [PATCH 1/9] cellGcmSetUserHandler (empty), SPU.SF syntax Test commit --- rpcs3/Emu/Cell/SPUInterpreter.h | 8 ++++---- rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 234329da47..f12b7185ea 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -72,10 +72,10 @@ private: } void SF(u32 rt, u32 ra, u32 rb) { - CPU.GPR[rt]._u32[0] = CPU.GPR[rb]._u32[0] + ~CPU.GPR[ra]._u32[0] + 1; - CPU.GPR[rt]._u32[1] = CPU.GPR[rb]._u32[1] + ~CPU.GPR[ra]._u32[1] + 1; - CPU.GPR[rt]._u32[2] = CPU.GPR[rb]._u32[2] + ~CPU.GPR[ra]._u32[2] + 1; - CPU.GPR[rt]._u32[3] = CPU.GPR[rb]._u32[3] + ~CPU.GPR[ra]._u32[3] + 1; + CPU.GPR[rt]._u32[0] = CPU.GPR[rb]._u32[0] - CPU.GPR[ra]._u32[0]; + CPU.GPR[rt]._u32[1] = CPU.GPR[rb]._u32[1] - CPU.GPR[ra]._u32[1]; + CPU.GPR[rt]._u32[2] = CPU.GPR[rb]._u32[2] - CPU.GPR[ra]._u32[2]; + CPU.GPR[rt]._u32[3] = CPU.GPR[rb]._u32[3] - CPU.GPR[ra]._u32[3]; } void OR(u32 rt, u32 ra, u32 rb) { diff --git a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp index 9c7e503c2c..bfef39781b 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp @@ -284,6 +284,12 @@ u32 cellGcmGetTiledPitchSize(u32 size) return size; } +u32 cellGcmSetUserHandler(u32 handler) +{ + cellGcmSys.Warning("cellGcmSetUserHandler(handler=0x%x)", handler); + return handler; +} + u32 cellGcmGetDefaultCommandWordSize() { cellGcmSys.Warning("cellGcmGetDefaultCommandWordSize()"); @@ -546,6 +552,7 @@ int cellGcmSetSecondVFrequency (u32 freq) void cellGcmSys_init() { cellGcmSys.AddFunc(0x055bd74d, cellGcmGetTiledPitchSize); + cellGcmSys.AddFunc(0x06edea9e, cellGcmSetUserHandler); cellGcmSys.AddFunc(0x15bae46b, cellGcmInit); cellGcmSys.AddFunc(0x21397818, cellGcmSetFlipCommand); cellGcmSys.AddFunc(0x21ac3697, cellGcmAddressToOffset); From c48168ffc7c4653b7d681df28a6e6b775684026e Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 29 Nov 2013 19:27:10 +0400 Subject: [PATCH 2/9] SPU ELF loading I'm not sure that it's correct way to get entry point --- rpcs3/Emu/Cell/SPUThread.cpp | 2 +- rpcs3/Emu/System.cpp | 3 ++- rpcs3/Loader/ELF.cpp | 1 + rpcs3/Loader/ELF32.cpp | 2 ++ rpcs3/Loader/ELF64.cpp | 3 +++ rpcs3/Loader/Loader.cpp | 1 + rpcs3/Loader/Loader.h | 3 +++ 7 files changed, 13 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 8bc5a30dbc..2faacb49bd 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -51,7 +51,7 @@ void SPUThread::InitRegs() SPU.Status.SetValue(SPU_STATUS_RUNNING); Prxy.QueryType.SetValue(0); MFC.CMDStatus.SetValue(0); - PC = SPU.NPC.GetValue(); + //PC = SPU.NPC.GetValue(); } u64 SPUThread::GetFreeStackSize() const diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index d81b93dcae..3a0fb8b4e8 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -185,7 +185,8 @@ void Emulator::Load() ConLog.Write("max addr = 0x%x", l.GetMaxAddr()); thread.SetOffset(Memory.MainMem.GetStartAddr()); Memory.MainMem.Alloc(Memory.MainMem.GetStartAddr() + l.GetMaxAddr(), 0xFFFFED - l.GetMaxAddr()); - thread.SetEntry(l.GetEntry() - Memory.MainMem.GetStartAddr()); + //thread.SetEntry(l.GetEntry() - Memory.MainMem.GetStartAddr()); + thread.SetEntry(l.GetTextEntry()); break; case MACHINE_PPC64: diff --git a/rpcs3/Loader/ELF.cpp b/rpcs3/Loader/ELF.cpp index d967e4583e..fd1526e665 100644 --- a/rpcs3/Loader/ELF.cpp +++ b/rpcs3/Loader/ELF.cpp @@ -27,6 +27,7 @@ bool ELFLoader::LoadInfo() entry = loader->GetEntry(); machine = loader->GetMachine(); + _text_section_offset = loader->GetTextEntry(); return true; } diff --git a/rpcs3/Loader/ELF32.cpp b/rpcs3/Loader/ELF32.cpp index 6684a4f584..c022de7d58 100644 --- a/rpcs3/Loader/ELF32.cpp +++ b/rpcs3/Loader/ELF32.cpp @@ -135,6 +135,8 @@ bool ELF32Loader::LoadShdrInfo() name += c; } shdr_name_arr.Add(name); + if(name == ".text") //temporary solution for SPU ELF loading + _text_section_offset = shdr_arr[i].sh_offset; } return true; diff --git a/rpcs3/Loader/ELF64.cpp b/rpcs3/Loader/ELF64.cpp index e03ac6cadd..be47e93813 100644 --- a/rpcs3/Loader/ELF64.cpp +++ b/rpcs3/Loader/ELF64.cpp @@ -186,6 +186,8 @@ bool ELF64Loader::LoadShdrInfo(s64 offset) } shdr_name_arr.Add(name); + if(name == ".text") + _text_section_offset = shdr_arr[i].sh_offset; } return true; @@ -371,6 +373,7 @@ bool ELF64Loader::LoadPhdrData(u64 offset) if(!module->Load(nid)) { ConLog.Warning("Unknown function 0x%08x in '%s' module", nid, module_name.mb_str()); + SysCalls::DoFunc(nid); } } #ifdef LOADER_DEBUG diff --git a/rpcs3/Loader/Loader.cpp b/rpcs3/Loader/Loader.cpp index 86f8ebd326..5725ef0dd2 100644 --- a/rpcs3/Loader/Loader.cpp +++ b/rpcs3/Loader/Loader.cpp @@ -147,6 +147,7 @@ bool Loader::Analyze() machine = m_loader->GetMachine(); entry = m_loader->GetMachine() == MACHINE_SPU ? m_loader->GetEntry() + g_spu_offset : m_loader->GetEntry(); + _text_section_offset = m_loader->GetTextEntry(); return true; } diff --git a/rpcs3/Loader/Loader.h b/rpcs3/Loader/Loader.h index ab5c974c20..ee04d50911 100644 --- a/rpcs3/Loader/Loader.h +++ b/rpcs3/Loader/Loader.h @@ -181,12 +181,14 @@ protected: u32 min_addr; u32 max_addr; Elf_Machine machine; + u32 _text_section_offset; LoaderBase() : machine(MACHINE_Unknown) , entry(0) , min_addr(0) , max_addr(0) + , _text_section_offset(0) { } @@ -196,6 +198,7 @@ public: Elf_Machine GetMachine() { return machine; } u32 GetEntry() { return entry; } + u32 GetTextEntry() { return _text_section_offset; } u32 GetMinAddr() { return min_addr; } u32 GetMaxAddr() { return min_addr; } }; From 384e20acf023b5d8ca26efd89e4d2e4c89ca3167 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 30 Nov 2013 03:40:31 +0400 Subject: [PATCH 3/9] SPU fixes Partially tested and fixed --- rpcs3/Emu/Cell/SPUInterpreter.h | 65 +++++++++++++++++++++------------ rpcs3/Emu/Cell/SPUThread.cpp | 2 +- rpcs3/Loader/ELF64.cpp | 1 + 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index f12b7185ea..e61416980f 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -555,10 +555,10 @@ private: } void CDD(u32 rt, u32 ra, s32 i7) { - const int t = ((CPU.GPR[ra]._u32[0] + i7) & 0xf) >> 3; + const int t = (((CPU.GPR[ra]._u32[0] + i7) & 0xf) >> 3) ^ 1; - for (int i=0; i<16; ++i) - CPU.GPR[rt]._u8[i] = 0x10 + i; + CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; + CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u64[t] = (u64)0x0001020304050607; } @@ -1163,9 +1163,9 @@ private: CPU.GPR[rt]._u16[i] = CPU.GPR[ra]._u16[i] + i10; } } - void STQD(u32 rt, s32 i10, u32 ra) + void STQD(u32 rt, s32 i10, u32 ra) //hello_world addr=0x178, value won't be saved { - u32 lsa = CPU.GPR[ra]._u32[3] + i10; + const u32 lsa = (CPU.GPR[ra]._u32[3] + (i10 << 4)) & 0xFFFFFFF0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQD: bad lsa (0x%x)", lsa); @@ -1176,7 +1176,7 @@ private: } void LQD(u32 rt, s32 i10, u32 ra) { - u32 lsa = CPU.GPR[ra]._u32[3] + i10; + const u32 lsa = (CPU.GPR[ra]._u32[3] + (i10 << 4)) & 0xFFFFFFF0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQD: bad lsa (0x%x)", lsa); @@ -1224,14 +1224,14 @@ private: { for(u32 i = 0; i < 4; ++i) { - CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] > (u32)i10) ? 0xffffffff : 0x00000000; + CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] > (u32)i10) ? 0xffffffff : 0x00000000; } } void CLGTHI(u32 rt, u32 ra, s32 i10) { for(u32 i = 0; i < 8; ++i) { - CPU.GPR[rt]._u16[i] = (CPU.GPR[rt]._u16[i] > (u16)i10) ? 0xffff : 0x0000; + CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)i10) ? 0xffff : 0x0000; } } void CLGTBI(u32 rt, u32 ra, s32 i10) @@ -1246,18 +1246,18 @@ private: void MPYI(u32 rt, u32 ra, s32 i10) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = CPU.GPR[rt]._i16[w*2 + 1] * i10; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2 + 1] * i10; } void MPYUI(u32 rt, u32 ra, s32 i10) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = CPU.GPR[rt]._u16[w*2 + 1] * (u16)(i10 & 0xffff); + CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2 + 1] * (u16)(i10 & 0xffff); } void CEQI(u32 rt, u32 ra, s32 i10) { for(u32 i = 0; i < 4; ++i) { - CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] == (u32)i10) ? 0xffffffff : 0x00000000; + CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] == (u32)i10) ? 0xffffffff : 0x00000000; } } void CEQHI(u32 rt, u32 ra, s32 i10) @@ -1304,7 +1304,24 @@ private: } void SHUFB(u32 rt, u32 ra, u32 rb, u32 rc) { - //ConLog.Warning("SHUFB"); + for (int i = 0; i < 16; i++) + { + u8 b = CPU.GPR[rc]._u8[i]; + if(b & 0x80) { + if(b & 0x40) { + if(b & 0x20) + CPU.GPR[rt]._u8[i] = 0x80; + else + CPU.GPR[rt]._u8[i] = 0xFF; + } else + CPU.GPR[rt]._u8[i] = 0x00; + } else { + if(b & 0x10) + CPU.GPR[rt]._u8[i] = CPU.GPR[rb]._u8[15 - (b & 0x0F)]; + else + CPU.GPR[rt]._u8[i] = CPU.GPR[ra]._u8[15 - (b & 0x0F)]; + } + } } void MPYA(u32 rc, u32 ra, u32 rb, u32 rt) { @@ -1313,24 +1330,24 @@ private: } void FNMS(u32 rt, u32 ra, u32 rb, u32 rc) { - CPU.GPR[rt]._f[0] -= CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0]; - CPU.GPR[rt]._f[1] -= CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1]; - CPU.GPR[rt]._f[2] -= CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2]; - CPU.GPR[rt]._f[3] -= CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3]; + CPU.GPR[rt]._f[0] = CPU.GPR[rc]._f[0] - CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0]; + CPU.GPR[rt]._f[1] = CPU.GPR[rc]._f[1] - CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1]; + CPU.GPR[rt]._f[2] = CPU.GPR[rc]._f[2] - CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2]; + CPU.GPR[rt]._f[3] = CPU.GPR[rc]._f[3] - CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3]; } void FMA(u32 rc, u32 ra, u32 rb, u32 rt) { - CPU.GPR[rt]._f[0] += CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0]; - CPU.GPR[rt]._f[1] += CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1]; - CPU.GPR[rt]._f[2] += CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2]; - CPU.GPR[rt]._f[3] += CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3]; + CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0] + CPU.GPR[rc]._f[0]; + CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1] + CPU.GPR[rc]._f[1]; + CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2] + CPU.GPR[rc]._f[2]; + CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3] + CPU.GPR[rc]._f[3]; } void FMS(u32 rc, u32 ra, u32 rb, u32 rt) { - CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0] - CPU.GPR[rt]._f[0]; - CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1] - CPU.GPR[rt]._f[1]; - CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2] - CPU.GPR[rt]._f[2]; - CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3] - CPU.GPR[rt]._f[3]; + CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0] - CPU.GPR[rc]._f[0]; + CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1] - CPU.GPR[rc]._f[1]; + CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2] - CPU.GPR[rc]._f[2]; + CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3] - CPU.GPR[rc]._f[3]; } void UNK(u32 code, u32 opcode, u32 gcode) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 8bc5a30dbc..2faacb49bd 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -51,7 +51,7 @@ void SPUThread::InitRegs() SPU.Status.SetValue(SPU_STATUS_RUNNING); Prxy.QueryType.SetValue(0); MFC.CMDStatus.SetValue(0); - PC = SPU.NPC.GetValue(); + //PC = SPU.NPC.GetValue(); } u64 SPUThread::GetFreeStackSize() const diff --git a/rpcs3/Loader/ELF64.cpp b/rpcs3/Loader/ELF64.cpp index e03ac6cadd..f0406e0aea 100644 --- a/rpcs3/Loader/ELF64.cpp +++ b/rpcs3/Loader/ELF64.cpp @@ -371,6 +371,7 @@ bool ELF64Loader::LoadPhdrData(u64 offset) if(!module->Load(nid)) { ConLog.Warning("Unknown function 0x%08x in '%s' module", nid, module_name.mb_str()); + SysCalls::DoFunc(nid); } } #ifdef LOADER_DEBUG From b217742c7b177a01926d755742adabbb9a200a6f Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 30 Nov 2013 03:50:43 +0400 Subject: [PATCH 4/9] Merge fix --- rpcs3/Emu/System.cpp | 3 +-- rpcs3/Loader/ELF.cpp | 1 - rpcs3/Loader/ELF32.cpp | 2 -- rpcs3/Loader/ELF64.cpp | 2 -- rpcs3/Loader/Loader.cpp | 1 - rpcs3/Loader/Loader.h | 3 --- 6 files changed, 1 insertion(+), 11 deletions(-) diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 3a0fb8b4e8..d81b93dcae 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -185,8 +185,7 @@ void Emulator::Load() ConLog.Write("max addr = 0x%x", l.GetMaxAddr()); thread.SetOffset(Memory.MainMem.GetStartAddr()); Memory.MainMem.Alloc(Memory.MainMem.GetStartAddr() + l.GetMaxAddr(), 0xFFFFED - l.GetMaxAddr()); - //thread.SetEntry(l.GetEntry() - Memory.MainMem.GetStartAddr()); - thread.SetEntry(l.GetTextEntry()); + thread.SetEntry(l.GetEntry() - Memory.MainMem.GetStartAddr()); break; case MACHINE_PPC64: diff --git a/rpcs3/Loader/ELF.cpp b/rpcs3/Loader/ELF.cpp index fd1526e665..d967e4583e 100644 --- a/rpcs3/Loader/ELF.cpp +++ b/rpcs3/Loader/ELF.cpp @@ -27,7 +27,6 @@ bool ELFLoader::LoadInfo() entry = loader->GetEntry(); machine = loader->GetMachine(); - _text_section_offset = loader->GetTextEntry(); return true; } diff --git a/rpcs3/Loader/ELF32.cpp b/rpcs3/Loader/ELF32.cpp index c022de7d58..6684a4f584 100644 --- a/rpcs3/Loader/ELF32.cpp +++ b/rpcs3/Loader/ELF32.cpp @@ -135,8 +135,6 @@ bool ELF32Loader::LoadShdrInfo() name += c; } shdr_name_arr.Add(name); - if(name == ".text") //temporary solution for SPU ELF loading - _text_section_offset = shdr_arr[i].sh_offset; } return true; diff --git a/rpcs3/Loader/ELF64.cpp b/rpcs3/Loader/ELF64.cpp index be47e93813..f0406e0aea 100644 --- a/rpcs3/Loader/ELF64.cpp +++ b/rpcs3/Loader/ELF64.cpp @@ -186,8 +186,6 @@ bool ELF64Loader::LoadShdrInfo(s64 offset) } shdr_name_arr.Add(name); - if(name == ".text") - _text_section_offset = shdr_arr[i].sh_offset; } return true; diff --git a/rpcs3/Loader/Loader.cpp b/rpcs3/Loader/Loader.cpp index 5725ef0dd2..86f8ebd326 100644 --- a/rpcs3/Loader/Loader.cpp +++ b/rpcs3/Loader/Loader.cpp @@ -147,7 +147,6 @@ bool Loader::Analyze() machine = m_loader->GetMachine(); entry = m_loader->GetMachine() == MACHINE_SPU ? m_loader->GetEntry() + g_spu_offset : m_loader->GetEntry(); - _text_section_offset = m_loader->GetTextEntry(); return true; } diff --git a/rpcs3/Loader/Loader.h b/rpcs3/Loader/Loader.h index ee04d50911..ab5c974c20 100644 --- a/rpcs3/Loader/Loader.h +++ b/rpcs3/Loader/Loader.h @@ -181,14 +181,12 @@ protected: u32 min_addr; u32 max_addr; Elf_Machine machine; - u32 _text_section_offset; LoaderBase() : machine(MACHINE_Unknown) , entry(0) , min_addr(0) , max_addr(0) - , _text_section_offset(0) { } @@ -198,7 +196,6 @@ public: Elf_Machine GetMachine() { return machine; } u32 GetEntry() { return entry; } - u32 GetTextEntry() { return _text_section_offset; } u32 GetMinAddr() { return min_addr; } u32 GetMaxAddr() { return min_addr; } }; From ae39d3802f2193479c6878a471a6c0c74097d9db Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 1 Dec 2013 04:36:55 +0400 Subject: [PATCH 5/9] SPU Fixes 2 It's almost finished --- rpcs3/Emu/Cell/SPUInterpreter.h | 494 ++++++++++++++++---------------- rpcs3/Emu/Cell/SPUThread.h | 1 + 2 files changed, 247 insertions(+), 248 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index e61416980f..07cb71baf6 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -278,22 +278,22 @@ private: void BIZ(u32 rt, u32 ra) { if(CPU.GPR[rt]._u32[3] == 0) - CPU.SetBranch(CPU.GPR[ra]._u32[3] & 0xfffffffc); + CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); } void BINZ(u32 rt, u32 ra) { if(CPU.GPR[rt]._u32[3] != 0) - CPU.SetBranch(CPU.GPR[ra]._u32[3] & 0xfffffffc); + CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); } void BIHZ(u32 rt, u32 ra) { if(CPU.GPR[rt]._u16[7] == 0) - CPU.SetBranch(CPU.GPR[ra]._u32[3] & 0xfffffffc); + CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); } void BIHNZ(u32 rt, u32 ra) { if(CPU.GPR[rt]._u16[7] != 0) - CPU.SetBranch(CPU.GPR[ra]._u32[3] & 0xfffffffc); + CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); } void STOPD(u32 rc, u32 ra, u32 rb) { @@ -301,7 +301,7 @@ private: } void STQX(u32 rt, u32 ra, u32 rb) { - u32 lsa = CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]; + u32 lsa = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0xFFFFFFF0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQX: bad lsa (0x%x)", lsa); @@ -313,7 +313,7 @@ private: } void BI(u32 ra) { - CPU.SetBranch(CPU.GPR[ra]._u32[3] & 0xfffffffc); + CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); } void BISL(u32 rt, u32 ra) { @@ -332,12 +332,11 @@ private: } void HBR(u32 p, u32 ro, u32 ra) { - CPU.SetBranch(CPU.GPR[ra]._u32[0]); } void GB(u32 rt, u32 ra) { CPU.GPR[rt].Reset(); - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] & 1) | + CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[0] & 1) | ((CPU.GPR[ra]._u32[1] & 1) << 1) | ((CPU.GPR[ra]._u32[2] & 1) << 2) | ((CPU.GPR[ra]._u32[3] & 1) << 3); @@ -347,41 +346,47 @@ private: CPU.GPR[rt].Reset(); for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u32[0] |= (CPU.GPR[ra]._u16[h] & 1) << h; + CPU.GPR[rt]._u32[3] |= (CPU.GPR[ra]._u16[h] & 1) << h; } void GBB(u32 rt, u32 ra) { CPU.GPR[rt].Reset(); for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u32[0] |= (CPU.GPR[ra]._u8[b] & 1) << b; + CPU.GPR[rt]._u32[3] |= (CPU.GPR[ra]._u8[b] & 1) << b; } void FSM(u32 rt, u32 ra) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = (CPU.GPR[ra]._u32[0] & (8 >> w)) ? ~0 : 0; + CPU.GPR[rt]._u32[w] = (CPU.GPR[ra]._u32[3] & (1 << w)) ? ~0 : 0; } void FSMH(u32 rt, u32 ra) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._u32[0] & (128 >> h)) ? ~0 : 0; + CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._u32[3] & (1 << h)) ? ~0 : 0; } void FSMB(u32 rt, u32 ra) { for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = (CPU.GPR[ra]._u32[0] & (32768 >> b)) ? ~0 : 0; + CPU.GPR[rt]._u8[b] = (CPU.GPR[ra]._u32[3] & (1 << b)) ? ~0 : 0; } void FREST(u32 rt, u32 ra) { - UNIMPLEMENTED(); + //(SSE) RCPPS - Compute Reciprocals of Packed Single-Precision Floating-Point Values + //rt = approximate(1/ra) + CPU.GPR[rt]._m128 = _mm_rcp_ps(CPU.GPR[ra]._m128); } void FRSQEST(u32 rt, u32 ra) { - UNIMPLEMENTED(); + //(SSE) RSQRTPS - Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values + //rt = approximate(1/sqrt(abs(ra))) + //abs(ra) === ra & FloatAbsMask + const __m128 FloatAbsMask = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; + CPU.GPR[rt]._m128 = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[ra]._m128, FloatAbsMask)); } void LQX(u32 rt, u32 ra, u32 rb) { - u32 lsa = CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]; + u32 lsa = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0xFFFFFFF0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQX: bad lsa (0x%x)", lsa); @@ -393,238 +398,195 @@ private: } void ROTQBYBI(u32 rt, u32 ra, u32 rb) { - const int s = (CPU.GPR[rb]._u32[0] >> 3) & 0xf; + const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0xf; - for (int b = 0; b < 8; b++) - { - if(b + s < 16) - { - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; - } - else - { - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s - 16]; - } - } + for (int b = 0; b < 16; b++) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[(b - s) & 0xf]; } void ROTQMBYBI(u32 rt, u32 ra, u32 rb) { - const int nShift = ((0 - CPU.GPR[rb]._u32[0]) >> 3) & 0x1f; + const int s = (0 - (CPU.GPR[rb]._u32[3] >> 3)) & 0x1f; - for (int b = 0; b < 16; b++) - { - if (b >= nShift) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - nShift]; - else - CPU.GPR[rt]._u8[b] = 0; - } + CPU.GPR[rt].Reset(); + for (int b = 0; b < 16 - s; b++) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; } void SHLQBYBI(u32 rt, u32 ra, u32 rb) { - const int nShift = (CPU.GPR[rb]._u32[0] >> 3) & 0x1f; + const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0x1f; - for (int b = 0; b < 16; b++) - { - if ((b + nShift) < 16) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + nShift]; - else - CPU.GPR[rt]._u8[b] = 0; - } + CPU.GPR[rt].Reset(); + for (int b = s; b < 16; b++) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - s]; } void CBX(u32 rt, u32 ra, u32 rb) { - int n = (CPU.GPR[rb]._u32[0] + CPU.GPR[ra]._u32[0]) & 0xf; + const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0xF; - for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = b == n ? 3 : b | 0x10; + CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; + CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; + CPU.GPR[rt]._u8[15 - t] = 0x03; } void CHX(u32 rt, u32 ra, u32 rb) { - int n = ((CPU.GPR[rb]._u32[0] + CPU.GPR[ra]._u32[0]) & 0xf) >> 1; + const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0xE; - for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = h == n ? 0x0203 : (h * 2 * 0x0101 + 0x1011); + CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; + CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; + CPU.GPR[rt]._u16[7 - (t >> 1)] = 0x0203; } void CWX(u32 rt, u32 ra, u32 rb) { - const u32 t = ((CPU.GPR[ra]._u32[0] + CPU.GPR[rb]._u32[0]) & 0xc) / 4; - for(u32 i=0; i<16; ++i) CPU.GPR[rt]._i8[i] = 0x10 + i; - CPU.GPR[rt]._u32[t] = 0x10203; + const u32 t = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0xC; + + CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; + CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; + CPU.GPR[rt]._u32[3 - (t >> 2)] = 0x00010203; } void CDX(u32 rt, u32 ra, u32 rb) { - int n = ((CPU.GPR[rb]._u32[0] + CPU.GPR[ra]._u32[0]) & 0x8) >> 2; + const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0x8; - for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = (w == n) ? 0x00010203 : (w == (n + 1)) ? 0x04050607 : (0x01010101 * (w * 4) + 0x10111213); + CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; + CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; + CPU.GPR[rt]._u64[1 - (t >> 3)] = (u64)0x0001020304050607; } void ROTQBI(u32 rt, u32 ra, u32 rb) { - int nShift = CPU.GPR[rb]._u32[0] & 0x7; + const int t = CPU.GPR[rb]._u32[3] & 0x7; - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << nShift) | (CPU.GPR[ra]._u32[1] >> (32 - nShift)); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << nShift) | (CPU.GPR[ra]._u32[2] >> (32 - nShift)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << nShift) | (CPU.GPR[ra]._u32[3] >> (32 - nShift)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << nShift) | (CPU.GPR[ra]._u32[0] >> (32 - nShift)); + CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << t) | (CPU.GPR[ra]._u32[3] >> (32 - t)); + CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << t) | (CPU.GPR[ra]._u32[0] >> (32 - t)); + CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << t) | (CPU.GPR[ra]._u32[1] >> (32 - t)); + CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << t) | (CPU.GPR[ra]._u32[2] >> (32 - t)); } void ROTQMBI(u32 rt, u32 ra, u32 rb) { - int nShift = (0 - CPU.GPR[rb]._u32[0]) % 8; + const int t = (0 - CPU.GPR[rb]._u32[3]) & 0x7; - CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] >> nShift; - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] >> nShift) | (CPU.GPR[ra]._u32[0] << (32 - nShift)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] >> nShift) | (CPU.GPR[ra]._u32[1] << (32 - nShift)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] >> nShift) | (CPU.GPR[ra]._u32[2] << (32 - nShift)); + CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] >> t) | (CPU.GPR[ra]._u32[1] << (32 - t)); + CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] >> t) | (CPU.GPR[ra]._u32[2] << (32 - t)); + CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] >> t) | (CPU.GPR[ra]._u32[3] << (32 - t)); + CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] >> t); } void SHLQBI(u32 rt, u32 ra, u32 rb) { - const int nShift = CPU.GPR[rb]._u32[0] & 0x7; + const int t = CPU.GPR[rb]._u32[3] & 0x7; - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << nShift) | (CPU.GPR[ra]._u32[1] >> (32 - nShift)); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << nShift) | (CPU.GPR[ra]._u32[2] >> (32 - nShift)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << nShift) | (CPU.GPR[ra]._u32[3] >> (32 - nShift)); - CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] << nShift; + CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << t); + CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << t) | (CPU.GPR[ra]._u32[0] >> (32 - t)); + CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << t) | (CPU.GPR[ra]._u32[1] >> (32 - t)); + CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << t) | (CPU.GPR[ra]._u32[2] >> (32 - t)); } void ROTQBY(u32 rt, u32 ra, u32 rb) { - const s32 s = CPU.GPR[rb]._u8[0] & 0xf; + const int s = CPU.GPR[rb]._u32[3] & 0xf; - for(u32 b = 0; b < 16; ++b) - { - if(b + s < 16) - { - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; - } - else - { - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s - 16]; - } - } + for (int b = 0; b < 16; ++b) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[(b - s) & 0xf]; } void ROTQMBY(u32 rt, u32 ra, u32 rb) { - const int nShift = (0 - CPU.GPR[rb]._u32[0]) % 32; + const int s = (0 - CPU.GPR[rb]._u32[3]) & 0x1f; - for (int b = 0; b < 16; b++) - if (b >= nShift) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - nShift]; - else - CPU.GPR[rt]._u8[b] = 0; + CPU.GPR[rt].Reset(); + for (int b = 0; b < 16 - s; b++) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; } void SHLQBY(u32 rt, u32 ra, u32 rb) { - const int nShift = CPU.GPR[rb]._u32[0] & 0x1f; + const int s = CPU.GPR[rb]._u32[3] & 0x1f; - for (int b = 0; b < 16; b++) - if (b + nShift < 16) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + nShift]; - else - CPU.GPR[rt]._u8[b] = 0; + CPU.GPR[rt].Reset(); + for (int b = s; b < 16; b++) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - s]; } void ORX(u32 rt, u32 ra) { CPU.GPR[rt].Reset(); - - CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] | CPU.GPR[ra]._u32[1] | CPU.GPR[ra]._u32[2] | CPU.GPR[ra]._u32[3]; + CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[0] | CPU.GPR[ra]._u32[1] | CPU.GPR[ra]._u32[2] | CPU.GPR[ra]._u32[3]; } void CBD(u32 rt, u32 ra, s32 i7) { - const int n = (CPU.GPR[ra]._u32[0] + i7) & 0xf; - - for (int b = 0; b < 16; b++) - if (b == n) - CPU.GPR[rt]._u8[b] = 0x3; - else - CPU.GPR[rt]._u8[b] = b | 0x10; - } - void CHD(u32 rt, u32 ra, s32 i7) - { - int n = ((CPU.GPR[ra]._u32[0] + i7) & 0xf) >> 1; - - for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = h == n ? 0x0203 : (h * 2 * 0x0101 + 0x1011); - } - void CWD(u32 rt, u32 ra, s32 i7) - { - const int t = ((CPU.GPR[ra]._u32[0] + i7) & 0xf) >> 2; - - for (int i=0; i<16; ++i) - CPU.GPR[rt]._u8[i] = 0x10 + i; - - CPU.GPR[rt]._u32[t] = 0x10203; - } - void CDD(u32 rt, u32 ra, s32 i7) - { - const int t = (((CPU.GPR[ra]._u32[0] + i7) & 0xf) >> 3) ^ 1; + const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xF; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; + CPU.GPR[rt]._u8[15 - t] = 0x03; + } + void CHD(u32 rt, u32 ra, s32 i7) + { + const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xE; - CPU.GPR[rt]._u64[t] = (u64)0x0001020304050607; + CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; + CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; + CPU.GPR[rt]._u16[7 - (t >> 1)] = 0x0203; + } + void CWD(u32 rt, u32 ra, s32 i7) + { + const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xC; + + CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; + CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; + CPU.GPR[rt]._u32[3 - (t >> 2)] = 0x00010203; + } + void CDD(u32 rt, u32 ra, s32 i7) + { + const int t = (CPU.GPR[ra]._u32[3] + i7) & 0x8; + + CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; + CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; + CPU.GPR[rt]._u64[1 - (t >> 3)] = (u64)0x0001020304050607; } void ROTQBII(u32 rt, u32 ra, s32 i7) { - int nShift = i7 & 0x7; + const int s = i7 & 0x7; - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << nShift) | (CPU.GPR[ra]._u32[1] >> (32 - nShift)); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << nShift) | (CPU.GPR[ra]._u32[2] >> (32 - nShift)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << nShift) | (CPU.GPR[ra]._u32[3] >> (32 - nShift)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << nShift) | (CPU.GPR[ra]._u32[0] >> (32 - nShift)); + CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << s) | (CPU.GPR[ra]._u32[3] >> (32 - s)); + CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << s) | (CPU.GPR[ra]._u32[0] >> (32 - s)); + CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << s) | (CPU.GPR[ra]._u32[1] >> (32 - s)); + CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << s) | (CPU.GPR[ra]._u32[2] >> (32 - s)); } void ROTQMBII(u32 rt, u32 ra, s32 i7) { - int nShift = (0 - i7) % 8; + const int s = (0 - i7) & 0x7; - CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] >> nShift; - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] >> nShift) | (CPU.GPR[ra]._u32[0] << (32 - nShift)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] >> nShift) | (CPU.GPR[ra]._u32[1] << (32 - nShift)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] >> nShift) | (CPU.GPR[ra]._u32[2] << (32 - nShift)); + CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] >> s) | (CPU.GPR[ra]._u32[1] << (32 - s)); + CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] >> s) | (CPU.GPR[ra]._u32[2] << (32 - s)); + CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] >> s) | (CPU.GPR[ra]._u32[3] << (32 - s)); + CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] >> s); } void SHLQBII(u32 rt, u32 ra, s32 i7) { - const int nShift = i7 & 0x7; + const int s = i7 & 0x7; - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << nShift) | (CPU.GPR[ra]._u32[1] >> (32 - nShift)); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << nShift) | (CPU.GPR[ra]._u32[2] >> (32 - nShift)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << nShift) | (CPU.GPR[ra]._u32[3] >> (32 - nShift)); - CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] << nShift; + CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << s); + CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << s) | (CPU.GPR[ra]._u32[0] >> (32 - s)); + CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << s) | (CPU.GPR[ra]._u32[1] >> (32 - s)); + CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << s) | (CPU.GPR[ra]._u32[2] >> (32 - s)); } void ROTQBYI(u32 rt, u32 ra, s32 i7) { - const u16 s = i7 & 0xf; + const int s = i7 & 0xf; - for(u32 b = 0; b < 16; ++b) - { - if(b + s < 16) - { - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; - } - else - { - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s - 16]; - } - } + for (int b = 0; b < 16; b++) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[(b - s) & 0xf]; } void ROTQMBYI(u32 rt, u32 ra, s32 i7) { - const int nShift = (0 - i7) % 32; + const int s = (0 - i7) & 0x1f; - for (int b = 0; b < 16; b++) - if (b >= nShift) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - nShift]; - else - CPU.GPR[rt]._u8[b] = 0; + CPU.GPR[rt].Reset(); + for (int b = 0; b < 16 - s; b++) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; } void SHLQBYI(u32 rt, u32 ra, s32 i7) { - const u16 s = i7 & 0x1f; + const int s = i7 & 0x1f; CPU.GPR[rt].Reset(); - - for(u32 b = 0; b + s < 16; ++b) - { - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; - } + for (int b = s; b < 16; b++) + CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - s]; } void NOP(u32 rt) { @@ -647,7 +609,7 @@ private: void EQV(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = (CPU.GPR[ra]._u32[w] & CPU.GPR[rb]._u32[w]) | ~(CPU.GPR[ra]._u32[w] | CPU.GPR[rb]._u32[w]); + CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] ^ (~CPU.GPR[rb]._u32[w]); } void CGTB(u32 rt, u32 ra, u32 rb) { @@ -665,10 +627,7 @@ private: //HGT uses signed values. HLGT uses unsigned values void HGT(u32 rt, s32 ra, s32 rb) { - if(CPU.GPR[ra]._i32[0] > CPU.GPR[rb]._i32[0]) - { - CPU.Stop(); - } + if(CPU.GPR[ra]._i32[3] > CPU.GPR[rb]._i32[3]) CPU.Stop(); } void CLZ(u32 rt, u32 ra) { @@ -685,13 +644,13 @@ private: } void XSWD(u32 rt, u32 ra) { - CPU.GPR[rt]._i64[0] = (s64)CPU.GPR[ra]._i32[1]; - CPU.GPR[rt]._i64[1] = (s64)CPU.GPR[ra]._i32[3]; + CPU.GPR[rt]._i64[0] = (s64)CPU.GPR[ra]._i32[0]; + CPU.GPR[rt]._i64[1] = (s64)CPU.GPR[ra]._i32[2]; } void XSHW(u32 rt, u32 ra) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = (s32)CPU.GPR[ra]._i16[w*2 + 1]; + CPU.GPR[rt]._i32[w] = (s32)CPU.GPR[ra]._i16[w*2]; } void CNTB(u32 rt, u32 ra) { @@ -704,7 +663,7 @@ private: void XSBH(u32 rt, u32 ra) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._i16[h] = (s16)CPU.GPR[ra]._i8[h*2 + 1]; + CPU.GPR[rt]._i16[h] = (s16)CPU.GPR[ra]._i8[h*2]; } void CLGT(u32 rt, u32 ra, u32 rb) { @@ -795,10 +754,7 @@ private: } void HLGT(u32 rt, u32 ra, u32 rb) { - if(CPU.GPR[ra]._u32[0] > CPU.GPR[rb]._u32[0]) - { - CPU.Stop(); - } + if(CPU.GPR[ra]._u32[3] > CPU.GPR[rb]._u32[3]) CPU.Stop(); } void DFMA(u32 rt, u32 ra, u32 rb) { @@ -817,8 +773,8 @@ private: } void DFNMA(u32 rt, u32 ra, u32 rb) { - CPU.GPR[rt]._d[0] = - CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0] - CPU.GPR[rt]._d[0] ; - CPU.GPR[rt]._d[1] = - CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1] - CPU.GPR[rt]._d[1] ; + CPU.GPR[rt]._d[0] = -(CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0] + CPU.GPR[rt]._d[0]); + CPU.GPR[rt]._d[1] = -(CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1] + CPU.GPR[rt]._d[1]); } void CEQ(u32 rt, u32 ra, u32 rb) { @@ -828,7 +784,7 @@ private: void MPYHHU(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2] * CPU.GPR[rb]._u16[w*2]; + CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2+1] * CPU.GPR[rb]._u16[w*2+1]; } void ADDX(u32 rt, u32 ra, u32 rb) { @@ -843,7 +799,7 @@ private: void CGX(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = (CPU.GPR[ra]._u32[w] + CPU.GPR[rb]._u32[w] + (CPU.GPR[rt]._u32[w] & 1)) < CPU.GPR[ra]._u32[w] ? 1 : 0; + CPU.GPR[rt]._u32[w] = ((u64)CPU.GPR[ra]._u32[w] + (u64)CPU.GPR[rb]._u32[w] + (u64)(CPU.GPR[rt]._u32[w] & 1)) >> 32; } void BGX(u32 rt, u32 ra, u32 rb) { @@ -851,19 +807,19 @@ private: for (int w = 0; w < 4; w++) { - nResult = (u64)CPU.GPR[rb]._u32[w] - (u64)CPU.GPR[ra]._u32[w] - (1 - (CPU.GPR[rt]._u32[w] & 1)); + nResult = (u64)CPU.GPR[rb]._u32[w] - (u64)CPU.GPR[ra]._u32[w] - (u64)(1 - (CPU.GPR[rt]._u32[w] & 1)); CPU.GPR[rt]._u32[w] = nResult < 0 ? 0 : 1; } } void MPYHHA(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] += CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2]; + CPU.GPR[rt]._i32[w] += CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2+1]; } void MPYHHAU(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] += CPU.GPR[ra]._u16[w*2] * CPU.GPR[rb]._u16[w*2]; + CPU.GPR[rt]._u32[w] += CPU.GPR[ra]._u16[w*2+1] * CPU.GPR[rb]._u16[w*2+1]; } //Forced bits to 0, hence the shift: @@ -876,15 +832,15 @@ private: } void FESD(u32 rt, u32 ra) { - CPU.GPR[rt]._d[0] = (double)CPU.GPR[ra]._f[0]; - CPU.GPR[rt]._d[1] = (double)CPU.GPR[ra]._f[2]; + CPU.GPR[rt]._d[0] = (double)CPU.GPR[ra]._f[1]; + CPU.GPR[rt]._d[1] = (double)CPU.GPR[ra]._f[3]; } void FRDS(u32 rt, u32 ra) { - CPU.GPR[rt]._f[0] = (float)CPU.GPR[ra]._d[0]; - CPU.GPR[rt]._f[1] = 0x00000000; - CPU.GPR[rt]._f[2] = (float)CPU.GPR[ra]._d[1]; - CPU.GPR[rt]._f[3] = 0x00000000; + CPU.GPR[rt]._f[1] = (float)CPU.GPR[ra]._d[0]; + CPU.GPR[rt]._u32[0] = 0x00000000; + CPU.GPR[rt]._f[3] = (float)CPU.GPR[ra]._d[1]; + CPU.GPR[rt]._u32[1] = 0x00000000; } void FSCRWR(u32 rt, u32 ra) { @@ -892,7 +848,55 @@ private: } void DFTSV(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + const u64 DoubleExpMask = 0x7ff0000000000000; + const u64 DoubleFracMask = 0x000fffffffffffff; + const u64 DoubleSignMask = 0x8000000000000000; + CPU.GPR[rt].Reset(); + if (i7 & 1) //Negative Denorm Check (-, exp is zero, frac is non-zero) + for (int i = 0; i < 2; i++) + { + if (CPU.GPR[ra]._u64[i] & DoubleFracMask) + if (CPU.GPR[ra]._u64[i] & DoubleSignMask & DoubleExpMask == DoubleSignMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 2) //Positive Denorm Check (+, exp is zero, frac is non-zero) + for (int i = 0; i < 2; i++) + { + if (CPU.GPR[ra]._u64[i] & DoubleFracMask) + if (CPU.GPR[ra]._u64[i] & DoubleSignMask & DoubleExpMask == 0) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 4) //Negative Zero Check (-, exp is zero, frac is zero) + for (int i = 0; i < 2; i++) + { + if (CPU.GPR[ra]._u64[i] == DoubleSignMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 8) //Positive Zero Check (+, exp is zero, frac is zero) + for (int i = 0; i < 2; i++) + { + if (CPU.GPR[ra]._u64[i] == 0) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 16) //Negative Infinity Check (-, exp is 0x7ff, frac is zero) + for (int i = 0; i < 2; i++) + { + if (CPU.GPR[ra]._u64[i] == DoubleSignMask & DoubleExpMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 32) //Positive Infinity Check (+, exp is 0x7ff, frac is zero) + for (int i = 0; i < 2; i++) + { + if (CPU.GPR[ra]._u64[i] == DoubleExpMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 64) //Not-a-Number Check (any sign, exp is 0x7ff, frac is non-zero) + for (int i = 0; i < 2; i++) + { + if (CPU.GPR[ra]._u64[i] & DoubleFracMask) + if (CPU.GPR[ra]._u64[i] & DoubleExpMask == DoubleExpMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } } void FCEQ(u32 rt, u32 ra, u32 rb) { @@ -909,22 +913,22 @@ private: void MPY(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2 + 1] * CPU.GPR[rb]._i16[w*2 + 1]; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2]; } void MPYH(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = ((CPU.GPR[ra]._i32[w] >> 16) * (CPU.GPR[rb]._i32[w] & 0xffff)) << 16; + CPU.GPR[rt]._i32[w] = (CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2]) << 16; } void MPYHH(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2]; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2+1]; } void MPYS(u32 rt, u32 ra, u32 rb) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = (CPU.GPR[ra]._i16[w*2 + 1] * CPU.GPR[rb]._i16[w*2 + 1]) >> 16; + CPU.GPR[rt]._i32[w] = (CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2]) >> 16; } void CEQH(u32 rt, u32 ra, u32 rb) { @@ -959,7 +963,7 @@ private: } void HEQ(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + if(CPU.GPR[ra]._i32[3] == CPU.GPR[rb]._i32[3]) CPU.Stop(); } //0 - 9 @@ -983,11 +987,12 @@ private: //0 - 8 void BRZ(u32 rt, s32 i16) { - if(!CPU.GPR[rt]._u32[3]) CPU.SetBranch(branchTarget(CPU.PC, i16)); + if (CPU.GPR[rt]._u32[3] == 0) + CPU.SetBranch(branchTarget(CPU.PC, i16)); } void STQA(u32 rt, s32 i16) { - u32 lsa = i16 << 2; + u32 lsa = (i16 << 2) & 0xFFFFFFF0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQA: bad lsa (0x%x)", lsa); @@ -999,20 +1004,22 @@ private: } void BRNZ(u32 rt, s32 i16) { - if(CPU.GPR[rt]._u32[3] != 0) + if (CPU.GPR[rt]._u32[3] != 0) CPU.SetBranch(branchTarget(CPU.PC, i16)); } void BRHZ(u32 rt, s32 i16) { - if(!CPU.GPR[rt]._u16[7]) CPU.SetBranch(branchTarget(CPU.PC, i16)); + if (CPU.GPR[rt]._u16[7] == 0) + CPU.SetBranch(branchTarget(CPU.PC, i16)); } void BRHNZ(u32 rt, s32 i16) { - if(CPU.GPR[rt]._u16[7]) CPU.SetBranch(branchTarget(CPU.PC, i16)); + if (CPU.GPR[rt]._u16[7] != 0) + CPU.SetBranch(branchTarget(CPU.PC, i16)); } void STQR(u32 rt, s32 i16) { - u32 lsa = branchTarget(CPU.PC, i16); + u32 lsa = branchTarget(CPU.PC, (i16 << 2) & 0xFFFFFFF0); if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQR: bad lsa (0x%x)", lsa); @@ -1024,11 +1031,11 @@ private: } void BRA(s32 i16) { - CPU.SetBranch(i16 << 2); + CPU.SetBranch(branchTarget(0, i16)); } void LQA(u32 rt, s32 i16) { - u32 lsa = i16 << 2; + u32 lsa = (i16 << 2) & 0xFFFFFFF0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQA: bad lsa (0x%x)", lsa); @@ -1040,9 +1047,9 @@ private: } void BRASL(u32 rt, s32 i16) { + CPU.SetBranch(branchTarget(0, i16)); CPU.GPR[rt].Reset(); - CPU.GPR[rt]._u32[0] = CPU.PC + 4; - CPU.SetBranch(i16 << 2); + CPU.GPR[rt]._u32[3] = CPU.PC + 4; } void BR(s32 i16) { @@ -1066,9 +1073,9 @@ private: } void BRSL(u32 rt, s32 i16) { + CPU.SetBranch(branchTarget(CPU.PC, i16)); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(CPU.PC, i16)); } void LQR(u32 rt, s32 i16) { @@ -1084,25 +1091,25 @@ private: } void IL(u32 rt, s32 i16) { - CPU.GPR[rt]._u32[0] = i16; - CPU.GPR[rt]._u32[1] = i16; - CPU.GPR[rt]._u32[2] = i16; - CPU.GPR[rt]._u32[3] = i16; + CPU.GPR[rt]._i32[0] = + CPU.GPR[rt]._i32[1] = + CPU.GPR[rt]._i32[2] = + CPU.GPR[rt]._i32[3] = i16; } void ILHU(u32 rt, s32 i16) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u16[w*2 + 1] = i16; + CPU.GPR[rt]._i32[w] = i16 << 16; } void ILH(u32 rt, s32 i16) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = i16; + CPU.GPR[rt]._i16[h] = i16; } void IOHL(u32 rt, s32 i16) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] |= i16; + CPU.GPR[rt]._i32[w] |= (i16 & 0xFFFF); } @@ -1110,19 +1117,17 @@ private: void ORI(u32 rt, u32 ra, s32 i10) { for(u32 i = 0; i < 4; ++i) - { - CPU.GPR[rt]._u32[i] = CPU.GPR[ra]._u32[i] | i10; - } + CPU.GPR[rt]._i32[i] = CPU.GPR[ra]._i32[i] | i10; } void ORHI(u32 rt, u32 ra, s32 i10) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] | i10; + CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] | i10; } void ORBI(u32 rt, u32 ra, s32 i10) { for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] | (i10 & 0xff); + CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] | i10; } void SFI(u32 rt, u32 ra, s32 i10) { @@ -1137,31 +1142,27 @@ private: void ANDI(u32 rt, u32 ra, s32 i10) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] & i10; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] & i10; } void ANDHI(u32 rt, u32 ra, s32 i10) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] & i10; + CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] & i10; } void ANDBI(u32 rt, u32 ra, s32 i10) { for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] & (i10 & 0xff); + CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] & i10; } void AI(u32 rt, u32 ra, s32 i10) { for(u32 i = 0; i < 4; ++i) - { - CPU.GPR[rt]._u32[i] = CPU.GPR[ra]._u32[i] + i10; - } + CPU.GPR[rt]._i32[i] = CPU.GPR[ra]._i32[i] + i10; } void AHI(u32 rt, u32 ra, s32 i10) { - for(u32 i = 0; i < 8; ++i) - { - CPU.GPR[rt]._u16[i] = CPU.GPR[ra]._u16[i] + i10; - } + for(u32 h = 0; h < 8; ++h) + CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] + i10; } void STQD(u32 rt, s32 i10, u32 ra) //hello_world addr=0x178, value won't be saved { @@ -1189,17 +1190,17 @@ private: void XORI(u32 rt, u32 ra, s32 i10) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] ^ i10; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] ^ i10; } void XORHI(u32 rt, u32 ra, s32 i10) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] ^ i10; + CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] ^ i10; } void XORBI(u32 rt, u32 ra, s32 i10) { for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] ^ (i10 & 0xff); + CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] ^ i10; } void CGTI(u32 rt, u32 ra, s32 i10) { @@ -1218,7 +1219,7 @@ private: } void HGTI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + if(CPU.GPR[ra]._i32[3] > i10) CPU.Stop(); } void CLGTI(u32 rt, u32 ra, s32 i10) { @@ -1241,55 +1242,52 @@ private: } void HLGTI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + if(CPU.GPR[ra]._u32[3] > (u32)i10) CPU.Stop(); } void MPYI(u32 rt, u32 ra, s32 i10) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2 + 1] * i10; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * i10; } void MPYUI(u32 rt, u32 ra, s32 i10) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2 + 1] * (u16)(i10 & 0xffff); + CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2] * (u16)(i10 & 0xffff); } void CEQI(u32 rt, u32 ra, s32 i10) { for(u32 i = 0; i < 4; ++i) - { - CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] == (u32)i10) ? 0xffffffff : 0x00000000; - } + CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._i32[i] == i10) ? 0xffffffff : 0x00000000; } void CEQHI(u32 rt, u32 ra, s32 i10) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._i16[h] == (s16)i10 ? 0xffff : 0; + CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._i16[h] == (s16)i10) ? 0xffff : 0; } void CEQBI(u32 rt, u32 ra, s32 i10) { for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] == (u8)(i10 & 0xff) ? 0xff : 0; + CPU.GPR[rt]._i8[b] = (CPU.GPR[ra]._i8[b] == (s8)(i10 & 0xff)) ? 0xff : 0; } void HEQI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + if(CPU.GPR[ra]._i32[3] == i10) CPU.Stop(); } //0 - 6 void HBRA(s32 ro, s32 i16) { - UNIMPLEMENTED(); } void HBRR(s32 ro, s32 i16) { } void ILA(u32 rt, s32 i18) { - CPU.GPR[rt]._u32[0] = i18; - CPU.GPR[rt]._u32[1] = i18; - CPU.GPR[rt]._u32[2] = i18; - CPU.GPR[rt]._u32[3] = i18; + CPU.GPR[rt]._u32[0] = + CPU.GPR[rt]._u32[1] = + CPU.GPR[rt]._u32[2] = + CPU.GPR[rt]._u32[3] = i18 & 0x3FFFF; } //0 - 3 @@ -1326,7 +1324,7 @@ private: void MPYA(u32 rc, u32 ra, u32 rb, u32 rt) { for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2 + 1] * CPU.GPR[rb]._i16[w*2 + 1] + CPU.GPR[rc]._i32[w]; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2] + CPU.GPR[rc]._i32[w]; } void FNMS(u32 rt, u32 ra, u32 rb, u32 rc) { diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 4cda6c586b..ade6b92574 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -204,6 +204,7 @@ union SPU_GPR_hdr { u128 _u128; s128 _i128; + __m128 _m128; u64 _u64[2]; s64 _i64[2]; u32 _u32[4]; From 1cfcc742d8937d1533c9ce26a3b092640079edd9 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 1 Dec 2013 21:30:40 +0400 Subject: [PATCH 6/9] SPU Fixes 3 Almost all implemented instructions are fixed. Partialy tested. Some instructions are still unimplemented. --- rpcs3/Emu/Cell/SPUInterpreter.h | 201 +++++++++++++++++--------------- 1 file changed, 108 insertions(+), 93 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 07cb71baf6..85b22a667f 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -301,7 +301,7 @@ private: } void STQX(u32 rt, u32 ra, u32 rb) { - u32 lsa = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0xFFFFFFF0; + u32 lsa = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQX: bad lsa (0x%x)", lsa); @@ -317,9 +317,10 @@ private: } void BISL(u32 rt, u32 ra) { - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + const u32 NewPC = CPU.GPR[ra]._u32[3]; CPU.GPR[rt].Reset(); - CPU.GPR[rt]._u32[3] = CPU.PC + 4; + CPU.GPR[rt]._u32[3] = CPU.PC + 4; + CPU.SetBranch(branchTarget(NewPC, 0)); } void IRET(u32 ra) { @@ -335,40 +336,48 @@ private: } void GB(u32 rt, u32 ra) { - CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[0] & 1) | ((CPU.GPR[ra]._u32[1] & 1) << 1) | ((CPU.GPR[ra]._u32[2] & 1) << 2) | ((CPU.GPR[ra]._u32[3] & 1) << 3); + CPU.GPR[rt]._u32[2] = 0; + CPU.GPR[rt]._u64[0] = 0; } void GBH(u32 rt, u32 ra) { - CPU.GPR[rt].Reset(); - + u32 temp = 0; for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u32[3] |= (CPU.GPR[ra]._u16[h] & 1) << h; + temp |= (CPU.GPR[ra]._u16[h] & 1) << h; + CPU.GPR[rt]._u32[3] = temp; + CPU.GPR[rt]._u32[2] = 0; + CPU.GPR[rt]._u64[0] = 0; } void GBB(u32 rt, u32 ra) { - CPU.GPR[rt].Reset(); - + u32 temp; for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u32[3] |= (CPU.GPR[ra]._u8[b] & 1) << b; + temp |= (CPU.GPR[ra]._u8[b] & 1) << b; + CPU.GPR[rt]._u32[3] = temp; + CPU.GPR[rt]._u32[2] = 0; + CPU.GPR[rt]._u64[0] = 0; } void FSM(u32 rt, u32 ra) { + const u32 pref = CPU.GPR[ra]._u32[3]; for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = (CPU.GPR[ra]._u32[3] & (1 << w)) ? ~0 : 0; + CPU.GPR[rt]._u32[w] = (pref & (1 << w)) ? ~0 : 0; } void FSMH(u32 rt, u32 ra) { + const u32 pref = CPU.GPR[ra]._u32[3]; for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._u32[3] & (1 << h)) ? ~0 : 0; + CPU.GPR[rt]._u16[h] = (pref & (1 << h)) ? ~0 : 0; } void FSMB(u32 rt, u32 ra) { + const u32 pref = CPU.GPR[ra]._u32[3]; for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = (CPU.GPR[ra]._u32[3] & (1 << b)) ? ~0 : 0; + CPU.GPR[rt]._u8[b] = (pref & (1 << b)) ? ~0 : 0; } void FREST(u32 rt, u32 ra) { @@ -386,7 +395,7 @@ private: } void LQX(u32 rt, u32 ra, u32 rb) { - u32 lsa = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0xFFFFFFF0; + u32 lsa = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQX: bad lsa (0x%x)", lsa); @@ -399,25 +408,25 @@ private: void ROTQBYBI(u32 rt, u32 ra, u32 rb) { const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0xf; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[(b - s) & 0xf]; + CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; } void ROTQMBYBI(u32 rt, u32 ra, u32 rb) { const int s = (0 - (CPU.GPR[rb]._u32[3] >> 3)) & 0x1f; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = 0; b < 16 - s; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; + CPU.GPR[rt]._u8[b] = temp._u8[b + s]; } void SHLQBYBI(u32 rt, u32 ra, u32 rb) { const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0x1f; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = s; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - s]; + CPU.GPR[rt]._u8[b] = temp._u8[b - s]; } void CBX(u32 rt, u32 ra, u32 rb) { @@ -454,57 +463,58 @@ private: void ROTQBI(u32 rt, u32 ra, u32 rb) { const int t = CPU.GPR[rb]._u32[3] & 0x7; - - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << t) | (CPU.GPR[ra]._u32[3] >> (32 - t)); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << t) | (CPU.GPR[ra]._u32[0] >> (32 - t)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << t) | (CPU.GPR[ra]._u32[1] >> (32 - t)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << t) | (CPU.GPR[ra]._u32[2] >> (32 - t)); + const SPU_GPR_hdr temp = CPU.GPR[ra]; + CPU.GPR[rt]._u32[0] = (temp._u32[0] << t) | (temp._u32[3] >> (32 - t)); + CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t)); + CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t)); + CPU.GPR[rt]._u32[3] = (temp._u32[3] << t) | (temp._u32[2] >> (32 - t)); } void ROTQMBI(u32 rt, u32 ra, u32 rb) { const int t = (0 - CPU.GPR[rb]._u32[3]) & 0x7; - - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] >> t) | (CPU.GPR[ra]._u32[1] << (32 - t)); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] >> t) | (CPU.GPR[ra]._u32[2] << (32 - t)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] >> t) | (CPU.GPR[ra]._u32[3] << (32 - t)); + const SPU_GPR_hdr temp = CPU.GPR[ra]; + CPU.GPR[rt]._u32[0] = (temp._u32[0] >> t) | (temp._u32[1] << (32 - t)); + CPU.GPR[rt]._u32[1] = (temp._u32[1] >> t) | (temp._u32[2] << (32 - t)); + CPU.GPR[rt]._u32[2] = (temp._u32[2] >> t) | (temp._u32[3] << (32 - t)); CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] >> t); } void SHLQBI(u32 rt, u32 ra, u32 rb) { const int t = CPU.GPR[rb]._u32[3] & 0x7; - - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << t); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << t) | (CPU.GPR[ra]._u32[0] >> (32 - t)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << t) | (CPU.GPR[ra]._u32[1] >> (32 - t)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << t) | (CPU.GPR[ra]._u32[2] >> (32 - t)); + const SPU_GPR_hdr temp = CPU.GPR[ra]; + CPU.GPR[rt]._u32[0] = (temp._u32[0] << t); + CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t)); + CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t)); + CPU.GPR[rt]._u32[3] = (temp._u32[3] << t) | (temp._u32[2] >> (32 - t)); } void ROTQBY(u32 rt, u32 ra, u32 rb) { const int s = CPU.GPR[rb]._u32[3] & 0xf; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; for (int b = 0; b < 16; ++b) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[(b - s) & 0xf]; + CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; } void ROTQMBY(u32 rt, u32 ra, u32 rb) { const int s = (0 - CPU.GPR[rb]._u32[3]) & 0x1f; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = 0; b < 16 - s; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; + CPU.GPR[rt]._u8[b] = temp._u8[b + s]; } void SHLQBY(u32 rt, u32 ra, u32 rb) { const int s = CPU.GPR[rb]._u32[3] & 0x1f; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = s; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - s]; + CPU.GPR[rt]._u8[b] = temp._u8[b - s]; } void ORX(u32 rt, u32 ra) { + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); - CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[0] | CPU.GPR[ra]._u32[1] | CPU.GPR[ra]._u32[2] | CPU.GPR[ra]._u32[3]; + CPU.GPR[rt]._u32[3] = temp._u32[0] | temp._u32[1] | temp._u32[2] | temp._u32[3]; } void CBD(u32 rt, u32 ra, s32 i7) { @@ -541,52 +551,52 @@ private: void ROTQBII(u32 rt, u32 ra, s32 i7) { const int s = i7 & 0x7; - - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << s) | (CPU.GPR[ra]._u32[3] >> (32 - s)); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << s) | (CPU.GPR[ra]._u32[0] >> (32 - s)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << s) | (CPU.GPR[ra]._u32[1] >> (32 - s)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << s) | (CPU.GPR[ra]._u32[2] >> (32 - s)); + const SPU_GPR_hdr temp = CPU.GPR[ra]; + CPU.GPR[rt]._u32[0] = (temp._u32[0] << s) | (temp._u32[3] >> (32 - s)); + CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s)); + CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s)); + CPU.GPR[rt]._u32[3] = (temp._u32[3] << s) | (temp._u32[2] >> (32 - s)); } void ROTQMBII(u32 rt, u32 ra, s32 i7) { const int s = (0 - i7) & 0x7; - - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] >> s) | (CPU.GPR[ra]._u32[1] << (32 - s)); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] >> s) | (CPU.GPR[ra]._u32[2] << (32 - s)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] >> s) | (CPU.GPR[ra]._u32[3] << (32 - s)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] >> s); + const SPU_GPR_hdr temp = CPU.GPR[ra]; + CPU.GPR[rt]._u32[0] = (temp._u32[0] >> s) | (temp._u32[1] << (32 - s)); + CPU.GPR[rt]._u32[1] = (temp._u32[1] >> s) | (temp._u32[2] << (32 - s)); + CPU.GPR[rt]._u32[2] = (temp._u32[2] >> s) | (temp._u32[3] << (32 - s)); + CPU.GPR[rt]._u32[3] = (temp._u32[3] >> s); } void SHLQBII(u32 rt, u32 ra, s32 i7) { const int s = i7 & 0x7; - - CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << s); - CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << s) | (CPU.GPR[ra]._u32[0] >> (32 - s)); - CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << s) | (CPU.GPR[ra]._u32[1] >> (32 - s)); - CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << s) | (CPU.GPR[ra]._u32[2] >> (32 - s)); + const SPU_GPR_hdr temp = CPU.GPR[ra]; + CPU.GPR[rt]._u32[0] = (temp._u32[0] << s); + CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s)); + CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s)); + CPU.GPR[rt]._u32[3] = (temp._u32[3] << s) | (temp._u32[2] >> (32 - s)); } void ROTQBYI(u32 rt, u32 ra, s32 i7) { const int s = i7 & 0xf; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; for (int b = 0; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[(b - s) & 0xf]; + CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; } void ROTQMBYI(u32 rt, u32 ra, s32 i7) { const int s = (0 - i7) & 0x1f; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = 0; b < 16 - s; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b + s]; + CPU.GPR[rt]._u8[b] = temp._u8[b + s]; } void SHLQBYI(u32 rt, u32 ra, s32 i7) { const int s = i7 & 0x1f; - + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = s; b < 16; b++) - CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b - s]; + CPU.GPR[rt]._u8[b] = temp._u8[b - s]; } void NOP(u32 rt) { @@ -618,10 +628,12 @@ private: } void SUMB(u32 rt, u32 ra, u32 rb) { + const SPU_GPR_hdr _a = CPU.GPR[ra]; + const SPU_GPR_hdr _b = CPU.GPR[rb]; for (int w = 0; w < 4; w++) { - CPU.GPR[rt]._u16[w*2] = CPU.GPR[ra]._u8[w*4] + CPU.GPR[ra]._u8[w*4 + 1] + CPU.GPR[ra]._u8[w*4 + 2] + CPU.GPR[ra]._u8[w*4 + 3]; - CPU.GPR[rt]._u16[w*2 + 1] = CPU.GPR[rb]._u8[w*4] + CPU.GPR[rb]._u8[w*4 + 1] + CPU.GPR[rb]._u8[w*4 + 2] + CPU.GPR[rb]._u8[w*4 + 3]; + CPU.GPR[rt]._u16[w*2] = _a._u8[w*4] + _a._u8[w*4 + 1] + _a._u8[w*4 + 2] + _a._u8[w*4 + 3]; + CPU.GPR[rt]._u16[w*2 + 1] = _b._u8[w*4] + _b._u8[w*4 + 1] + _b._u8[w*4 + 2] + _b._u8[w*4 + 3]; } } //HGT uses signed values. HLGT uses unsigned values @@ -654,11 +666,11 @@ private: } void CNTB(u32 rt, u32 ra) { + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); - for (int b = 0; b < 16; b++) for (int i = 0; i < 8; i++) - CPU.GPR[rt]._u8[b] += (CPU.GPR[ra]._u8[b] & (1 << i)) ? 1 : 0; + CPU.GPR[rt]._u8[b] += (temp._u8[b] & (1 << i)) ? 1 : 0; } void XSBH(u32 rt, u32 ra) { @@ -851,50 +863,51 @@ private: const u64 DoubleExpMask = 0x7ff0000000000000; const u64 DoubleFracMask = 0x000fffffffffffff; const u64 DoubleSignMask = 0x8000000000000000; + const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); if (i7 & 1) //Negative Denorm Check (-, exp is zero, frac is non-zero) for (int i = 0; i < 2; i++) { - if (CPU.GPR[ra]._u64[i] & DoubleFracMask) - if (CPU.GPR[ra]._u64[i] & DoubleSignMask & DoubleExpMask == DoubleSignMask) + if (temp._u64[i] & DoubleFracMask) + if (temp._u64[i] & DoubleSignMask & DoubleExpMask == DoubleSignMask) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 2) //Positive Denorm Check (+, exp is zero, frac is non-zero) for (int i = 0; i < 2; i++) { - if (CPU.GPR[ra]._u64[i] & DoubleFracMask) - if (CPU.GPR[ra]._u64[i] & DoubleSignMask & DoubleExpMask == 0) + if (temp._u64[i] & DoubleFracMask) + if (temp._u64[i] & DoubleSignMask & DoubleExpMask == 0) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 4) //Negative Zero Check (-, exp is zero, frac is zero) for (int i = 0; i < 2; i++) { - if (CPU.GPR[ra]._u64[i] == DoubleSignMask) + if (temp._u64[i] == DoubleSignMask) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 8) //Positive Zero Check (+, exp is zero, frac is zero) for (int i = 0; i < 2; i++) { - if (CPU.GPR[ra]._u64[i] == 0) + if (temp._u64[i] == 0) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 16) //Negative Infinity Check (-, exp is 0x7ff, frac is zero) for (int i = 0; i < 2; i++) { - if (CPU.GPR[ra]._u64[i] == DoubleSignMask & DoubleExpMask) + if (temp._u64[i] == DoubleSignMask & DoubleExpMask) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 32) //Positive Infinity Check (+, exp is 0x7ff, frac is zero) for (int i = 0; i < 2; i++) { - if (CPU.GPR[ra]._u64[i] == DoubleExpMask) + if (temp._u64[i] == DoubleExpMask) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 64) //Not-a-Number Check (any sign, exp is 0x7ff, frac is non-zero) for (int i = 0; i < 2; i++) { - if (CPU.GPR[ra]._u64[i] & DoubleFracMask) - if (CPU.GPR[ra]._u64[i] & DoubleExpMask == DoubleExpMask) + if (temp._u64[i] & DoubleFracMask) + if (temp._u64[i] & DoubleExpMask == DoubleExpMask) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } } @@ -992,7 +1005,7 @@ private: } void STQA(u32 rt, s32 i16) { - u32 lsa = (i16 << 2) & 0xFFFFFFF0; + u32 lsa = (i16 << 2) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQA: bad lsa (0x%x)", lsa); @@ -1019,7 +1032,7 @@ private: } void STQR(u32 rt, s32 i16) { - u32 lsa = branchTarget(CPU.PC, (i16 << 2) & 0xFFFFFFF0); + u32 lsa = branchTarget(CPU.PC, i16) & 0xFFFFFFF0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQR: bad lsa (0x%x)", lsa); @@ -1035,7 +1048,7 @@ private: } void LQA(u32 rt, s32 i16) { - u32 lsa = (i16 << 2) & 0xFFFFFFF0; + u32 lsa = (i16 << 2) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQA: bad lsa (0x%x)", lsa); @@ -1047,9 +1060,9 @@ private: } void BRASL(u32 rt, s32 i16) { - CPU.SetBranch(branchTarget(0, i16)); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; + CPU.SetBranch(branchTarget(0, i16)); } void BR(s32 i16) { @@ -1073,13 +1086,13 @@ private: } void BRSL(u32 rt, s32 i16) { - CPU.SetBranch(branchTarget(CPU.PC, i16)); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; + CPU.SetBranch(branchTarget(CPU.PC, i16)); } void LQR(u32 rt, s32 i16) { - u32 lsa = branchTarget(CPU.PC, i16); + u32 lsa = branchTarget(CPU.PC, i16) & 0xFFFFFFF0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQR: bad lsa (0x%x)", lsa); @@ -1164,9 +1177,9 @@ private: for(u32 h = 0; h < 8; ++h) CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] + i10; } - void STQD(u32 rt, s32 i10, u32 ra) //hello_world addr=0x178, value won't be saved + void STQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding { - const u32 lsa = (CPU.GPR[ra]._u32[3] + (i10 << 4)) & 0xFFFFFFF0; + const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQD: bad lsa (0x%x)", lsa); @@ -1175,9 +1188,9 @@ private: } CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); } - void LQD(u32 rt, s32 i10, u32 ra) + void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding { - const u32 lsa = (CPU.GPR[ra]._u32[3] + (i10 << 4)) & 0xFFFFFFF0; + const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQD: bad lsa (0x%x)", lsa); @@ -1277,7 +1290,7 @@ private: //0 - 6 void HBRA(s32 ro, s32 i16) - { + { //i16 is shifted left by 2 while decoding } void HBRR(s32 ro, s32 i16) { @@ -1293,15 +1306,17 @@ private: //0 - 3 void SELB(u32 rt, u32 ra, u32 rb, u32 rc) { - for(u32 i = 0; i < 4; ++i) + for(u64 i = 0; i < 2; ++i) { - CPU.GPR[rt]._u32[i] = - ( CPU.GPR[rc]._u32[i] & CPU.GPR[rb]._u32[i]) | - (~CPU.GPR[rc]._u32[i] & CPU.GPR[ra]._u32[i]); + CPU.GPR[rt]._u64[i] = + ( CPU.GPR[rc]._u64[i] & CPU.GPR[rb]._u64[i]) | + (~CPU.GPR[rc]._u64[i] & CPU.GPR[ra]._u64[i]); } } void SHUFB(u32 rt, u32 ra, u32 rb, u32 rc) { + const SPU_GPR_hdr _a = CPU.GPR[ra]; + const SPU_GPR_hdr _b = CPU.GPR[rb]; for (int i = 0; i < 16; i++) { u8 b = CPU.GPR[rc]._u8[i]; @@ -1315,9 +1330,9 @@ private: CPU.GPR[rt]._u8[i] = 0x00; } else { if(b & 0x10) - CPU.GPR[rt]._u8[i] = CPU.GPR[rb]._u8[15 - (b & 0x0F)]; + CPU.GPR[rt]._u8[i] = _b._u8[15 - (b & 0x0F)]; else - CPU.GPR[rt]._u8[i] = CPU.GPR[ra]._u8[15 - (b & 0x0F)]; + CPU.GPR[rt]._u8[i] = _a._u8[15 - (b & 0x0F)]; } } } From 38fabf7cd2328bda4ecdec86bbe92d4e794de2d8 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Mon, 2 Dec 2013 14:40:58 +0400 Subject: [PATCH 7/9] SPU Fixes 3.1 Small fixes --- rpcs3/Emu/Cell/SPUInterpreter.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 85b22a667f..ad2721572c 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -287,12 +287,12 @@ private: } void BIHZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u16[7] == 0) + if(CPU.GPR[rt]._u16[6] == 0) CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); } void BIHNZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u16[7] != 0) + if(CPU.GPR[rt]._u16[6] != 0) CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); } void STOPD(u32 rc, u32 ra, u32 rb) @@ -512,9 +512,9 @@ private: } void ORX(u32 rt, u32 ra) { - const SPU_GPR_hdr temp = CPU.GPR[ra]; - CPU.GPR[rt].Reset(); - CPU.GPR[rt]._u32[3] = temp._u32[0] | temp._u32[1] | temp._u32[2] | temp._u32[3]; + CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[0] | CPU.GPR[ra]._u32[1] | CPU.GPR[ra]._u32[2] | CPU.GPR[ra]._u32[3]; + CPU.GPR[rt]._u32[2] = 0; + CPU.GPR[rt]._u64[0] = 0; } void CBD(u32 rt, u32 ra, s32 i7) { @@ -1022,12 +1022,12 @@ private: } void BRHZ(u32 rt, s32 i16) { - if (CPU.GPR[rt]._u16[7] == 0) + if (CPU.GPR[rt]._u16[6] == 0) CPU.SetBranch(branchTarget(CPU.PC, i16)); } void BRHNZ(u32 rt, s32 i16) { - if (CPU.GPR[rt]._u16[7] != 0) + if (CPU.GPR[rt]._u16[6] != 0) CPU.SetBranch(branchTarget(CPU.PC, i16)); } void STQR(u32 rt, s32 i16) From 73c2628ef40d52845081fdb0c2ea20b6db474cbf Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Mon, 2 Dec 2013 22:49:06 +0400 Subject: [PATCH 8/9] SPU Fixes 3.2 --- rpcs3/Emu/Cell/SPUInterpreter.h | 12 +++---- rpcs3/Emu/Cell/SPUThread.h | 63 +++++++++++++++++++++++++++++++-- rpcs3/rpcs3.vcxproj | 3 +- rpcs3/rpcs3.vcxproj.filters | 5 ++- 4 files changed, 73 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index ad2721572c..160febdbca 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -390,8 +390,8 @@ private: //(SSE) RSQRTPS - Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values //rt = approximate(1/sqrt(abs(ra))) //abs(ra) === ra & FloatAbsMask - const __m128 FloatAbsMask = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; - CPU.GPR[rt]._m128 = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[ra]._m128, FloatAbsMask)); + const __m128i FloatAbsMask = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; + CPU.GPR[rt]._m128 = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[ra]._m128, (__m128&)FloatAbsMask)); } void LQX(u32 rt, u32 ra, u32 rb) { @@ -869,14 +869,14 @@ private: for (int i = 0; i < 2; i++) { if (temp._u64[i] & DoubleFracMask) - if (temp._u64[i] & DoubleSignMask & DoubleExpMask == DoubleSignMask) + if ((temp._u64[i] & (DoubleSignMask & DoubleExpMask)) == DoubleSignMask) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 2) //Positive Denorm Check (+, exp is zero, frac is non-zero) for (int i = 0; i < 2; i++) { if (temp._u64[i] & DoubleFracMask) - if (temp._u64[i] & DoubleSignMask & DoubleExpMask == 0) + if ((temp._u64[i] & (DoubleSignMask & DoubleExpMask)) == 0) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 4) //Negative Zero Check (-, exp is zero, frac is zero) @@ -894,7 +894,7 @@ private: if (i7 & 16) //Negative Infinity Check (-, exp is 0x7ff, frac is zero) for (int i = 0; i < 2; i++) { - if (temp._u64[i] == DoubleSignMask & DoubleExpMask) + if (temp._u64[i] == (DoubleSignMask & DoubleExpMask)) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } if (i7 & 32) //Positive Infinity Check (+, exp is 0x7ff, frac is zero) @@ -907,7 +907,7 @@ private: for (int i = 0; i < 2; i++) { if (temp._u64[i] & DoubleFracMask) - if (temp._u64[i] & DoubleExpMask == DoubleExpMask) + if ((temp._u64[i] & DoubleExpMask) == DoubleExpMask) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index ade6b92574..9db744895b 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -233,13 +233,13 @@ union SPU_SPR_hdr { u128 _u128; s128 _i128; - + u32 _u32[4]; SPU_SPR_hdr() {} wxString ToString() const { - return wxString::Format("%16%16", _u128.hi, _u128.lo); + return wxString::Format("%08x%08x%08x%08x", _u32[3], _u32[2], _u32[1], _u32[0]); } void Reset() @@ -356,6 +356,34 @@ public: { switch(ch) { + case SPU_RdEventStat: //Read event status with mask applied + case SPU_WrEventMask: //Write event mask + case SPU_WrEventAck: //Write end of event processing + case SPU_RdSigNotify1: //Signal notification 1 + case SPU_RdSigNotify2: //Signal notification 2 + case SPU_WrDec: //Write decrementer count + case SPU_RdDec: //Read decrementer count + case SPU_RdEventMask: //Read event mask + case SPU_RdMachStat: //Read SPU run status + case SPU_WrSRR0: //Write SPU machine state save/restore register 0 (SRR0) + case SPU_RdSRR0: //Read SPU machine state save/restore register 0 (SRR0) + case MFC_WrMSSyncReq: //Write multisource synchronization request + case MFC_RdTagMask: //Read tag mask + case MFC_LSA: //Write local memory address command parameter + case MFC_EAH: //Write high order DMA effective address command parameter + case MFC_EAL: //Write low order DMA effective address command parameter + case MFC_Size: //Write DMA transfer size command parameter + case MFC_TagID: //Write tag identifier command parameter + case MFC_Cmd: //Write and enqueue DMA command with associated class ID + case MFC_WrTagMask: //Write tag mask + case MFC_WrTagUpdate: //Write request for conditional or unconditional tag status update + case MFC_RdTagStat: //Read tag status with mask applied + case MFC_RdListStallStat: //Read DMA list stall-and-notify status + case MFC_WrListStallAck: //Write DMA list stall-and-notify acknowledge + case MFC_RdAtomicStat: //Read completion status of last completed immediate MFC atomic update command + ConLog.Error("%s error: unimplemented channel (%s).", __FUNCTION__, spu_ch_name[ch]); + break; + case SPU_WrOutMbox: return SPU.Out_MBox.GetFreeCount(); @@ -379,6 +407,23 @@ public: switch(ch) { + case SPU_WrEventMask: //Write event mask + case SPU_WrEventAck: //Write end of event processing + case SPU_WrDec: //Write decrementer count + case SPU_WrSRR0: //Write SPU machine state save/restore register 0 (SRR0) + case MFC_WrMSSyncReq: //Write multisource synchronization request + case MFC_LSA: //Write local memory address command parameter + case MFC_EAH: //Write high order DMA effective address command parameter + case MFC_EAL: //Write low order DMA effective address command parameter + case MFC_Size: //Write DMA transfer size command parameter + case MFC_TagID: //Write tag identifier command parameter + case MFC_Cmd: //Write and enqueue DMA command with associated class ID + case MFC_WrTagMask: //Write tag mask + case MFC_WrTagUpdate: //Write request for conditional or unconditional tag status update + case MFC_WrListStallAck: //Write DMA list stall-and-notify acknowledge + ConLog.Error("%s error: unimplemented channel (%s).", __FUNCTION__, spu_ch_name[ch]); + break; + case SPU_WrOutIntrMbox: ConLog.Warning("SPU_WrOutIntrMbox = 0x%x", v); @@ -410,6 +455,20 @@ public: switch(ch) { + case SPU_RdEventStat: //Read event status with mask applied + case SPU_RdSigNotify1: //Signal notification 1 + case SPU_RdSigNotify2: //Signal notification 2 + case SPU_RdDec: //Read decrementer count + case SPU_RdEventMask: //Read event mask + case SPU_RdMachStat: //Read SPU run status + case SPU_RdSRR0: //Read SPU machine state save/restore register 0 (SRR0) + case MFC_RdTagMask: //Read tag mask + case MFC_RdTagStat: //Read tag status with mask applied + case MFC_RdListStallStat: //Read DMA list stall-and-notify status + case MFC_RdAtomicStat: //Read completion status of last completed immediate MFC atomic update command + ConLog.Error("%s error: unimplemented channel (%s).", __FUNCTION__, spu_ch_name[ch]); + break; + case SPU_RdInMbox: if(!SPU.In_MBox.Pop(v)) v = 0; ConLog.Warning("%s: SPU_RdInMbox(0x%x).", __FUNCTION__, v); diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 62a417b4d9..ae92a31c8d 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -1,4 +1,4 @@ - + @@ -322,6 +322,7 @@ + diff --git a/rpcs3/rpcs3.vcxproj.filters b/rpcs3/rpcs3.vcxproj.filters index 7ac21c6caa..e01793878c 100644 --- a/rpcs3/rpcs3.vcxproj.filters +++ b/rpcs3/rpcs3.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -510,5 +510,8 @@ Utilities + + Include + \ No newline at end of file From 241ae8364519090d830f3924ff519264b07bac8d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 4 Dec 2013 00:35:45 +0400 Subject: [PATCH 9/9] SPU Improvement Implemented CFLTS, CFLTU, CSFLT and CUFLT. Finally fixed FREST. --- rpcs3/Emu/Cell/SPUInterpreter.h | 78 +++++++++++++++++++++++++++++---- rpcs3/Emu/Cell/SPUThread.h | 1 + 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 160febdbca..23832209e3 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -7,6 +7,13 @@ #define UNIMPLEMENTED() UNK(__FUNCTION__) +typedef union _CRT_ALIGN(16) __u32x4 { + unsigned __int32 _u32[4]; + __m128i m128i; + __m128 m128; + __m128d m128d; + } __u32x4; + class SPUInterpreter : public SPUOpcodes { private: @@ -390,8 +397,8 @@ private: //(SSE) RSQRTPS - Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values //rt = approximate(1/sqrt(abs(ra))) //abs(ra) === ra & FloatAbsMask - const __m128i FloatAbsMask = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; - CPU.GPR[rt]._m128 = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[ra]._m128, (__m128&)FloatAbsMask)); + const __u32x4 FloatAbsMask = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; + CPU.GPR[rt]._m128 = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[ra]._m128, FloatAbsMask.m128)); } void LQX(u32 rt, u32 ra, u32 rb) { @@ -972,7 +979,9 @@ private: } void FI(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + //Floating Interpolation: ra will be ignored. + //It should work correctly if result of preceding FREST or FRSQEST is sufficiently exact + CPU.GPR[rt] = CPU.GPR[rb]; } void HEQ(u32 rt, u32 ra, u32 rb) { @@ -982,19 +991,70 @@ private: //0 - 9 void CFLTS(u32 rt, u32 ra, s32 i8) { - UNIMPLEMENTED(); + const u32 scale = 173 - (i8 & 0xff); //unsigned immediate + for (int i = 0; i < 4; i++) + { + u32 exp = ((CPU.GPR[ra]._u32[i] >> 23) & 0xff) + scale; + + if (exp > 255) + exp = 255; + + CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] & 0x807fffff) | (exp << 23); + } + //(SSE2) CVTTPS2DQ - Convert with Truncation Packed Single FP to Packed Dword Int + CPU.GPR[rt]._m128i = _mm_cvttps_epi32(CPU.GPR[rt]._m128); } void CFLTU(u32 rt, u32 ra, s32 i8) { - UNIMPLEMENTED(); + const u32 scale = 173 - (i8 & 0xff); //unsigned immediate + for (int i = 0; i < 4; i++) + { + u32 exp = ((CPU.GPR[ra]._u32[i] >> 23) & 0xff) + scale; + + if (exp > 255) + exp = 255; + + if (CPU.GPR[ra]._u32[i] & 0x80000000) //if negative, result = 0 + CPU.GPR[rt]._u32[i] = 0; + else + { + CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] & 0x807fffff) | (exp << 23); + + if (CPU.GPR[rt]._f[i] > 0xffffffff) //if big, result = max + CPU.GPR[rt]._u32[i] = 0xffffffff; + else + CPU.GPR[rt]._u32[i] = floor(CPU.GPR[rt]._f[i]); + } + } } void CSFLT(u32 rt, u32 ra, s32 i8) { - UNIMPLEMENTED(); + //(SSE2) CVTDQ2PS - Convert Packed Dword Integers to Packed Single-Precision FP Values + CPU.GPR[rt]._m128 = _mm_cvtepi32_ps(CPU.GPR[ra]._m128i); + const u32 scale = 155 - (i8 & 0xff); //unsigned immediate + for (int i = 0; i < 4; i++) + { + u32 exp = ((CPU.GPR[rt]._u32[i] >> 23) & 0xff) - scale; + + if (exp > 255) //< 0 + exp = 0; + + CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23); + } } void CUFLT(u32 rt, u32 ra, s32 i8) { - UNIMPLEMENTED(); + const u32 scale = 155 - (i8 & 0xff); //unsigned immediate + for (int i = 0; i < 4; i++) + { + CPU.GPR[rt]._f[i] = (float)CPU.GPR[ra]._u32[i]; + u32 exp = ((CPU.GPR[rt]._u32[i] >> 23) & 0xff) - scale; + + if (exp > 255) //< 0 + exp = 0; + + CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23); + } } //0 - 8 @@ -1169,8 +1229,8 @@ private: } void AI(u32 rt, u32 ra, s32 i10) { - for(u32 i = 0; i < 4; ++i) - CPU.GPR[rt]._i32[i] = CPU.GPR[ra]._i32[i] + i10; + const __u32x4 imm = {i10, i10, i10, i10}; + CPU.GPR[rt]._m128i = _mm_add_epi32(CPU.GPR[ra]._m128i, imm.m128i); } void AHI(u32 rt, u32 ra, s32 i10) { diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 9db744895b..cf47369de4 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -205,6 +205,7 @@ union SPU_GPR_hdr u128 _u128; s128 _i128; __m128 _m128; + __m128i _m128i; u64 _u64[2]; s64 _i64[2]; u32 _u32[4];