From ae4420e6046d4ad3c5790f396c88ea4184b5ebd5 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:00:58 +0900 Subject: [PATCH 01/21] Implement missing PPU instructions. --- rpcs3/Emu/Cell/PPUDisAsm.h | 4 + rpcs3/Emu/Cell/PPUInstrTable.h | 48 +++++--- rpcs3/Emu/Cell/PPUInterpreter.h | 169 +++++++++++++++++---------- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 10 ++ rpcs3/Emu/Cell/PPULLVMRecompiler.h | 1 + rpcs3/Emu/Cell/PPUOpcodes.h | 19 +++ rpcs3/Emu/Cell/PPUThread.h | 8 +- 7 files changed, 179 insertions(+), 80 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUDisAsm.h b/rpcs3/Emu/Cell/PPUDisAsm.h index 531b21bc37..228c9edd4e 100644 --- a/rpcs3/Emu/Cell/PPUDisAsm.h +++ b/rpcs3/Emu/Cell/PPUDisAsm.h @@ -1675,6 +1675,10 @@ private: { DisAsm_V1_R2("stvlx", vs, ra, rb); } + void STDBRX(u32 rs, u32 ra, u32 rb) + { + DisAsm_R3("stdbrx", rs, ra, rb); + } void STSWX(u32 rs, u32 ra, u32 rb) { DisAsm_R3("swswx", rs, ra, rb); diff --git a/rpcs3/Emu/Cell/PPUInstrTable.h b/rpcs3/Emu/Cell/PPUInstrTable.h index c9f03c789d..a22627f2ae 100644 --- a/rpcs3/Emu/Cell/PPUInstrTable.h +++ b/rpcs3/Emu/Cell/PPUInstrTable.h @@ -226,6 +226,9 @@ namespace PPU_instr #define bind_instr(list, name, ...) \ static const auto& name = make_instr(list, #name, &PPUOpcodes::name, ##__VA_ARGS__) +#define bind_instr_oe(list, name, ...) \ + bind_instr(list, name, ##__VA_ARGS__); \ + static const auto& name##O = make_instr(list, #name "O", &PPUOpcodes::name, ##__VA_ARGS__) bind_instr(main_list, TDI, TO, RA, simm16); bind_instr(main_list, TWI, TO, RA, simm16); @@ -456,9 +459,9 @@ namespace PPU_instr /*0x004*/bind_instr(g1f_list, TW, TO, RA, RB); /*0x006*/bind_instr(g1f_list, LVSL, VD, RA, RB); /*0x007*/bind_instr(g1f_list, LVEBX, VD, RA, RB); - /*0x008*/bind_instr(g1f_list, SUBFC, RD, RA, RB, OE, RC); + /*0x008*/bind_instr_oe(g1f_list, SUBFC, RD, RA, RB, OE, RC); /*0x009*/bind_instr(g1f_list, MULHDU, RD, RA, RB, RC); - /*0x00a*/bind_instr(g1f_list, ADDC, RD, RA, RB, OE, RC); + /*0x00a*/bind_instr_oe(g1f_list, ADDC, RD, RA, RB, OE, RC); /*0x00b*/bind_instr(g1f_list, MULHWU, RD, RA, RB, RC); /*0x013*/bind_instr(g1f_list, MFOCRF, L_11, RD, CRM); /*0x014*/bind_instr(g1f_list, LWARX, RD, RA, RB); @@ -471,7 +474,7 @@ namespace PPU_instr /*0x020*/bind_instr(g1f_list, CMPL, CRFD, L_10, RA, RB); /*0x026*/bind_instr(g1f_list, LVSR, VD, RA, RB); /*0x027*/bind_instr(g1f_list, LVEHX, VD, RA, RB); - /*0x028*/bind_instr(g1f_list, SUBF, RD, RA, RB, OE, RC); + /*0x028*/bind_instr_oe(g1f_list, SUBF, RD, RA, RB, OE, RC); /*0x035*/bind_instr(g1f_list, LDUX, RD, RA, RB); /*0x036*/bind_instr(g1f_list, DCBST, RA, RB); /*0x037*/bind_instr(g1f_list, LWZUX, RD, RA, RB); @@ -485,12 +488,12 @@ namespace PPU_instr /*0x056*/bind_instr(g1f_list, DCBF, RA, RB); /*0x057*/bind_instr(g1f_list, LBZX, RD, RA, RB); /*0x067*/bind_instr(g1f_list, LVX, VD, RA, RB); - /*0x068*/bind_instr(g1f_list, NEG, RD, RA, OE, RC); + /*0x068*/bind_instr_oe(g1f_list, NEG, RD, RA, OE, RC); /*0x077*/bind_instr(g1f_list, LBZUX, RD, RA, RB); /*0x07c*/bind_instr(g1f_list, NOR, RA, RS, RB, RC); /*0x087*/bind_instr(g1f_list, STVEBX, VS, RA, RB); - /*0x088*/bind_instr(g1f_list, SUBFE, RD, RA, RB, OE, RC); - /*0x08a*/bind_instr(g1f_list, ADDE, RD, RA, RB, OE, RC); + /*0x088*/bind_instr_oe(g1f_list, SUBFE, RD, RA, RB, OE, RC); + /*0x08a*/bind_instr_oe(g1f_list, ADDE, RD, RA, RB, OE, RC); /*0x090*/bind_instr(g1f_list, MTOCRF, L_11, CRM, RS); /*0x095*/bind_instr(g1f_list, STDX, RS, RA, RB); /*0x096*/bind_instr(g1f_list, STWCX_, RS, RA, RB); @@ -499,18 +502,18 @@ namespace PPU_instr /*0x0b5*/bind_instr(g1f_list, STDUX, RS, RA, RB); /*0x0b7*/bind_instr(g1f_list, STWUX, RS, RA, RB); /*0x0c7*/bind_instr(g1f_list, STVEWX, VS, RA, RB); - /*0x0c8*/bind_instr(g1f_list, SUBFZE, RD, RA, OE, RC); - /*0x0ca*/bind_instr(g1f_list, ADDZE, RD, RA, OE, RC); + /*0x0c8*/bind_instr_oe(g1f_list, SUBFZE, RD, RA, OE, RC); + /*0x0ca*/bind_instr_oe(g1f_list, ADDZE, RD, RA, OE, RC); /*0x0d6*/bind_instr(g1f_list, STDCX_, RS, RA, RB); /*0x0d7*/bind_instr(g1f_list, STBX, RS, RA, RB); /*0x0e7*/bind_instr(g1f_list, STVX, VS, RA, RB); - /*0x0e8*/bind_instr(g1f_list, SUBFME, RD, RA, OE, RC); - /*0x0e9*/bind_instr(g1f_list, MULLD, RD, RA, RB, OE, RC); - /*0x0ea*/bind_instr(g1f_list, ADDME, RD, RA, OE, RC); - /*0x0eb*/bind_instr(g1f_list, MULLW, RD, RA, RB, OE, RC); + /*0x0e8*/bind_instr_oe(g1f_list, SUBFME, RD, RA, OE, RC); + /*0x0e9*/bind_instr_oe(g1f_list, MULLD, RD, RA, RB, OE, RC); + /*0x0ea*/bind_instr_oe(g1f_list, ADDME, RD, RA, OE, RC); + /*0x0eb*/bind_instr_oe(g1f_list, MULLW, RD, RA, RB, OE, RC); /*0x0f6*/bind_instr(g1f_list, DCBTST, RA, RB, TH); /*0x0f7*/bind_instr(g1f_list, STBUX, RS, RA, RB); - /*0x10a*/bind_instr(g1f_list, ADD, RD, RA, RB, OE, RC); + /*0x10a*/bind_instr_oe(g1f_list, ADD, RD, RA, RB, OE, RC); /*0x116*/bind_instr(g1f_list, DCBT, RA, RB, TH); /*0x117*/bind_instr(g1f_list, LHZX, RD, RA, RB); /*0x11c*/bind_instr(g1f_list, EQV, RA, RS, RB, RC); @@ -531,15 +534,21 @@ namespace PPU_instr /*0x1b6*/bind_instr(g1f_list, ECOWX, RS, RA, RB); /*0x1b7*/bind_instr(g1f_list, STHUX, RS, RA, RB); /*0x1bc*/bind_instr(g1f_list, OR, RA, RS, RB, RC); - /*0x1c9*/bind_instr(g1f_list, DIVDU, RD, RA, RB, OE, RC); - /*0x1cb*/bind_instr(g1f_list, DIVWU, RD, RA, RB, OE, RC); + /*0x1c9*/bind_instr_oe(g1f_list, DIVDU, RD, RA, RB, OE, RC); + /*0x1cb*/bind_instr_oe(g1f_list, DIVWU, RD, RA, RB, OE, RC); /*0x1d3*/bind_instr(g1f_list, MTSPR, SPR, RS); /*0x1d6*///DCBI /*0x1dc*/bind_instr(g1f_list, NAND, RA, RS, RB, RC); /*0x1e7*/bind_instr(g1f_list, STVXL, VS, RA, RB); - /*0x1e9*/bind_instr(g1f_list, DIVD, RD, RA, RB, OE, RC); - /*0x1eb*/bind_instr(g1f_list, DIVW, RD, RA, RB, OE, RC); + /*0x1e9*/bind_instr_oe(g1f_list, DIVD, RD, RA, RB, OE, RC); + /*0x1eb*/bind_instr_oe(g1f_list, DIVW, RD, RA, RB, OE, RC); /*0x207*/bind_instr(g1f_list, LVLX, VD, RA, RB); + // MULH{D|DU|W|WU} don't use OE, but a real Cell accepts + // opcodes with OE=1 and Rc=0, behaving as if OE was not set. + // OE=1 and Rc=1 causes an invalid instruction exception, but + // we don't worry about that. + static const auto& MULHDUO = make_instr<0x209>(g1f_list, "MULHDUO", &PPUOpcodes::MULHDU, RD, RA, RB, RC); + static const auto& MULHWUO = make_instr<0x20b>(g1f_list, "MULHWUO", &PPUOpcodes::MULHWU, RD, RA, RB, RC); /*0x214*/bind_instr(g1f_list, LDBRX, RD, RA, RB); /*0x215*/bind_instr(g1f_list, LSWX, RD, RA, RB); /*0x216*/bind_instr(g1f_list, LWBRX, RD, RA, RB); @@ -548,11 +557,14 @@ namespace PPU_instr /*0x21b*/bind_instr(g1f_list, SRD, RA, RS, RB, RC); /*0x227*/bind_instr(g1f_list, LVRX, VD, RA, RB); /*0x237*/bind_instr(g1f_list, LFSUX, FRD, RA, RB); + static const auto& MULHDO = make_instr<0x249>(g1f_list, "MULHDO", &PPUOpcodes::MULHD, RD, RA, RB, RC); + static const auto& MULHWO = make_instr<0x24b>(g1f_list, "MULHWO", &PPUOpcodes::MULHW, RD, RA, RB, RC); /*0x255*/bind_instr(g1f_list, LSWI, RD, RA, NB); /*0x256*/bind_instr(g1f_list, SYNC, L_9_10); /*0x257*/bind_instr(g1f_list, LFDX, FRD, RA, RB); /*0x277*/bind_instr(g1f_list, LFDUX, FRD, RA, RB); /*0x287*/bind_instr(g1f_list, STVLX, VS, RA, RB); + /*0x294*/bind_instr(g1f_list, STDBRX, RD, RA, RB); /*0x296*/bind_instr(g1f_list, STSWX, RS, RA, RB); /*0x296*/bind_instr(g1f_list, STWBRX, RS, RA, RB); /*0x297*/bind_instr(g1f_list, STFSX, FRS, RA, RB); @@ -657,4 +669,4 @@ namespace PPU_instr using namespace lists; using namespace implicts; #undef bind_instr -}; \ No newline at end of file +}; diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index ba6ad22b1e..460cf632a3 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -2381,16 +2381,16 @@ private: const u64 RB = CPU.GPR[rb]; CPU.GPR[rd] = ~RA + RB + 1; CPU.XER.CA = CPU.IsCarry(~RA, RB, 1); - if(oe) throw "SUBFC(): subfco"; + if(oe) CPU.SetOV((~RA>>63 == RB>>63) && (~RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - const s64 RA = CPU.GPR[ra]; - const s64 RB = CPU.GPR[rb]; + const u64 RA = CPU.GPR[ra]; + const u64 RB = CPU.GPR[rb]; CPU.GPR[rd] = RA + RB; CPU.XER.CA = CPU.IsCarry(RA, RB); - if(oe) throw "ADDC(): addco"; + if(oe) CPU.SetOV((RA>>63 == RB>>63) && (RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) @@ -2526,8 +2526,10 @@ private: } void SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - CPU.GPR[rd] = CPU.GPR[rb] - CPU.GPR[ra]; - if(oe) throw "SUBF(): subfo"; + const u64 RA = CPU.GPR[ra]; + const u64 RB = CPU.GPR[rb]; + CPU.GPR[rd] = RB - RA; + if(oe) CPU.SetOV((~RA>>63 == RB>>63) && (~RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void LDUX(u32 rd, u32 ra, u32 rb) @@ -2605,8 +2607,9 @@ private: } void NEG(u32 rd, u32 ra, u32 oe, bool rc) { - CPU.GPR[rd] = 0-CPU.GPR[ra]; - if(oe) throw "NEG(): nego"; + const u64 RA = CPU.GPR[ra]; + CPU.GPR[rd] = 0 - RA; + if(oe) CPU.SetOV((~RA>>63 == 0) && (~RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void LBZUX(u32 rd, u32 ra, u32 rb) @@ -2634,8 +2637,8 @@ private: const u64 RB = CPU.GPR[rb]; CPU.GPR[rd] = ~RA + RB + CPU.XER.CA; CPU.XER.CA = CPU.IsCarry(~RA, RB, CPU.XER.CA); + if(oe) CPU.SetOV((~RA>>63 == RB>>63) && (~RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - if(oe) throw "SUBFE(): subfeo"; } void ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2659,8 +2662,8 @@ private: CPU.GPR[rd] = RA + RB; CPU.XER.CA = CPU.IsCarry(RA, RB); } + if(oe) CPU.SetOV((RA>>63 == RB>>63) && (RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - if(oe) throw "ADDE(): addeo"; } void MTOCRF(u32 l, u32 crm, u32 rs) { @@ -2748,7 +2751,7 @@ private: const u64 RA = CPU.GPR[ra]; CPU.GPR[rd] = RA + CPU.XER.CA; CPU.XER.CA = CPU.IsCarry(RA, CPU.XER.CA); - if(oe) LOG_WARNING(PPU, "addzeo"); + if(oe) CPU.SetOV((RA>>63 == 0) && (RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) @@ -2756,8 +2759,8 @@ private: const u64 RA = CPU.GPR[ra]; CPU.GPR[rd] = ~RA + CPU.XER.CA; CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA); - if (oe) LOG_WARNING(PPU, "subfzeo"); - if (rc) CPU.UpdateCR0(CPU.GPR[rd]); + if(oe) CPU.SetOV((~RA>>63 == 0) && (~RA>>63 != CPU.GPR[rd]>>63)); + if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void STDCX_(u32 rs, u32 ra, u32 rb) { @@ -2787,14 +2790,20 @@ private: const u64 RA = CPU.GPR[ra]; CPU.GPR[rd] = ~RA + CPU.XER.CA + ~0ULL; CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA, ~0ULL); - if (oe) LOG_WARNING(PPU, "subfmeo"); - if (rc) CPU.UpdateCR0(CPU.GPR[rd]); + if(oe) CPU.SetOV((~RA>>63 == 1) && (~RA>>63 != CPU.GPR[rd]>>63)); + if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - CPU.GPR[rd] = (s64)((s64)CPU.GPR[ra] * (s64)CPU.GPR[rb]); + const s64 RA = CPU.GPR[ra]; + const s64 RB = CPU.GPR[rb]; + CPU.GPR[rd] = (s64)(RA * RB); + if(oe) + { + const s64 high = __mulh(RA, RB); + CPU.SetOV(high != s64(CPU.GPR[rd]) >> 63); + } if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - if(oe) throw "MULLD(): mulldo"; } void ADDME(u32 rd, u32 ra, u32 oe, bool rc) { @@ -2802,14 +2811,14 @@ private: CPU.GPR[rd] = RA + CPU.XER.CA - 1; CPU.XER.CA |= RA != 0; - if(oe) throw "ADDME(): addmeo"; + if(oe) CPU.SetOV((u64(RA)>>63 == 1) && (u64(RA)>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { CPU.GPR[rd] = (s64)((s64)(s32)CPU.GPR[ra] * (s64)(s32)CPU.GPR[rb]); + if(oe) CPU.SetOV(s64(CPU.GPR[rd]) < s64(-1)<<31 || s64(CPU.GPR[rd]) >= s64(1)<<31); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - if(oe) throw "MULLW(): mullwo"; } void DCBTST(u32 ra, u32 rb, u32 th) { @@ -2825,7 +2834,7 @@ private: const u64 RA = CPU.GPR[ra]; const u64 RB = CPU.GPR[rb]; CPU.GPR[rd] = RA + RB; - if(oe) throw "ADD(): addo"; + if(oe) CPU.SetOV((RA>>63 == RB>>63) && (RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } void DCBT(u32 ra, u32 rb, u32 th) @@ -2940,11 +2949,12 @@ private: if(RB == 0) { - if(oe) throw "DIVDU(): divduo"; + if(oe) CPU.SetOV(true); CPU.GPR[rd] = 0; } else { + if(oe) CPU.SetOV(false); CPU.GPR[rd] = RA / RB; } @@ -2957,11 +2967,12 @@ private: if(RB == 0) { - if(oe) throw "DIVWU(): divwuo"; + if(oe) CPU.SetOV(true); CPU.GPR[rd] = 0; } else { + if(oe) CPU.SetOV(false); CPU.GPR[rd] = RA / RB; } @@ -2991,11 +3002,12 @@ private: if (RB == 0 || ((u64)RA == (1ULL << 63) && RB == -1)) { - if(oe) throw "DIVD(): divdo"; + if(oe) CPU.SetOV(true); CPU.GPR[rd] = /*(((u64)RA & (1ULL << 63)) && RB == 0) ? -1 :*/ 0; } else { + if(oe) CPU.SetOV(false); CPU.GPR[rd] = RA / RB; } @@ -3008,11 +3020,12 @@ private: if (RB == 0 || ((u32)RA == (1 << 31) && RB == -1)) { - if(oe) throw "DIVW(): divwo"; + if(oe) CPU.SetOV(true); CPU.GPR[rd] = /*(((u32)RA & (1 << 31)) && RB == 0) ? -1 :*/ 0; } else { + if(oe) CPU.SetOV(false); CPU.GPR[rd] = (u32)(RA / RB); } @@ -3033,7 +3046,22 @@ private: } void LSWX(u32 rd, u32 ra, u32 rb) { - throw "LSWX()"; + u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; + u32 count = CPU.XER.XER & 0x7F; + for (; count >= 4; count -= 4, addr += 4, rd = (rd+1) & 31) + { + CPU.GPR[rd] = vm::get_ref>(vm::cast(addr)); + } + if (count) + { + u32 value = 0; + for (u32 byte = 0; byte < count; byte++) + { + u32 byte_value = vm::get_ref(vm::cast(addr+byte)); + value |= byte_value << ((3^byte)*8); + } + CPU.GPR[rd] = value; + } } void LWBRX(u32 rd, u32 ra, u32 rb) { @@ -3129,9 +3157,28 @@ private: for (u32 i = 0; i < 16u - eb; ++i) vm::write8(vm::cast(addr + i), CPU.VPR[vs]._u8[15 - i]); } + void STDBRX(u32 rs, u32 ra, u32 rb) + { + const u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; + vm::get_ref(vm::cast(addr)) = CPU.GPR[rs]; + } void STSWX(u32 rs, u32 ra, u32 rb) { - throw "STSWX()"; + u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; + u32 count = CPU.XER.XER & 0x7F; + for (; count >= 4; count -= 4, addr += 4, rs = (rs+1) & 31) + { + vm::write32(vm::cast(addr), (u32)CPU.GPR[rs]); + } + if (count) + { + u32 value = (u32)CPU.GPR[rs]; + for (u32 byte = 0; byte < count; byte++) + { + u32 byte_value = (u8)(value >> ((3^byte)*8)); + vm::write8(vm::cast(addr+byte), byte_value); + } + } } void STWBRX(u32 rs, u32 ra, u32 rb) { @@ -3520,25 +3567,25 @@ private: } CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FDIVS(): fdivs.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { CPU.FPR[frd] = static_cast(CPU.FPR[fra] - CPU.FPR[frb]); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FSUBS(): fsubs.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FADDS(u32 frd, u32 fra, u32 frb, bool rc) { CPU.FPR[frd] = static_cast(CPU.FPR[fra] + CPU.FPR[frb]); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FADDS(): fadds.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FSQRTS(u32 frd, u32 frb, bool rc) { CPU.FPR[frd] = static_cast(sqrt(CPU.FPR[frb])); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FSQRTS(): fsqrts.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FRES(u32 frd, u32 frb, bool rc) { @@ -3547,7 +3594,7 @@ private: CPU.SetFPSCRException(FPSCR_ZX); } CPU.FPR[frd] = static_cast(1.0 / CPU.FPR[frb]); - if(rc) throw "FRES(): fres.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FMULS(u32 frd, u32 fra, u32 frc, bool rc) { @@ -3555,31 +3602,31 @@ private: CPU.FPSCR.FI = 0; CPU.FPSCR.FR = 0; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FMULS(): fmuls.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = static_cast(CPU.FPR[fra] * CPU.FPR[frc] + CPU.FPR[frb]); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FMADDS(): fmadds.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = static_cast(CPU.FPR[fra] * CPU.FPR[frc] - CPU.FPR[frb]); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FMSUBS(): fmsubs.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = static_cast(-(CPU.FPR[fra] * CPU.FPR[frc] - CPU.FPR[frb])); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FNMSUBS(): fnmsubs.";////CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = static_cast(-(CPU.FPR[fra] * CPU.FPR[frc] + CPU.FPR[frb])); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FNMADDS(): fnmadds.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void STD(u32 rs, u32 ra, s32 d) { @@ -3599,7 +3646,7 @@ private: if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); CPU.FPSCR.FPSCR |= mask; - if(rc) throw "MTFSB1(): mtfsb1."; + if(rc) CPU.UpdateCR1(); } void MCRFS(u32 crbd, u32 crbs) { @@ -3632,7 +3679,7 @@ private: if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); CPU.FPSCR.FPSCR &= ~mask; - if(rc) throw "MTFSB0(): mtfsb0."; + if(rc) CPU.UpdateCR1(); } void MTFSFI(u32 crfd, u32 i, bool rc) { @@ -3650,12 +3697,12 @@ private: LOG_WARNING(PPU, "Non-IEEE mode enabled"); } - if(rc) throw "MTFSFI(): mtfsfi."; + if(rc) CPU.UpdateCR1(); } void MFFS(u32 frd, bool rc) { (u64&)CPU.FPR[frd] = CPU.FPSCR.FPSCR; - if(rc) throw "MFFS(): mffs."; + if(rc) CPU.UpdateCR1(); } void MTFSF(u32 flm, u32 frb, bool rc) { @@ -3674,7 +3721,7 @@ private: else LOG_WARNING(PPU, "Non-IEEE mode enabled"); } - if(rc) throw "MTFSF(): mtfsf."; + if(rc) CPU.UpdateCR1(); } void FCMPU(u32 crfd, u32 fra, u32 frb) { @@ -3762,7 +3809,7 @@ private: } (u64&)CPU.FPR[frd] = r; - if(rc) throw "FCTIW(): fctiw."; + if(rc) CPU.UpdateCR1(); } void FCTIWZ(u32 frd, u32 frb, bool rc) { @@ -3800,7 +3847,7 @@ private: } (u64&)CPU.FPR[frd] = (u64)value; - if(rc) throw "FCTIWZ(): fctiwz."; + if(rc) CPU.UpdateCR1(); } void FDIV(u32 frd, u32 fra, u32 frb, bool rc) { @@ -3843,30 +3890,30 @@ private: CPU.FPR[frd] = res; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FDIV(): fdiv.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FSUB(u32 frd, u32 fra, u32 frb, bool rc) { CPU.FPR[frd] = CPU.FPR[fra] - CPU.FPR[frb]; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FSUB(): fsub.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FADD(u32 frd, u32 fra, u32 frb, bool rc) { CPU.FPR[frd] = CPU.FPR[fra] + CPU.FPR[frb]; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FASS(): fadd.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FSQRT(u32 frd, u32 frb, bool rc) { CPU.FPR[frd] = sqrt(CPU.FPR[frb]); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FSQRT(): fsqrt.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = CPU.FPR[fra] >= 0.0 ? CPU.FPR[frc] : CPU.FPR[frb]; - if(rc) throw "FSEL(): fsel.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FMUL(u32 frd, u32 fra, u32 frc, bool rc) { @@ -3889,7 +3936,7 @@ private: CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); } - if(rc) throw "FMUL(): fmul.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FRSQRTE(u32 frd, u32 frb, bool rc) { @@ -3898,31 +3945,31 @@ private: CPU.SetFPSCRException(FPSCR_ZX); } CPU.FPR[frd] = 1.0 / sqrt(CPU.FPR[frb]); - if(rc) throw "FRSQRTE(): frsqrte.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = CPU.FPR[fra] * CPU.FPR[frc] - CPU.FPR[frb]; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FMSUB(): fmsub.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = CPU.FPR[fra] * CPU.FPR[frc] + CPU.FPR[frb]; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FMADD(): fmadd.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = -(CPU.FPR[fra] * CPU.FPR[frc] - CPU.FPR[frb]); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FNMSUB(): fnmsub.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { CPU.FPR[frd] = -(CPU.FPR[fra] * CPU.FPR[frc] + CPU.FPR[frb]); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FNMADD(): fnmadd.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FCMPO(u32 crfd, u32 fra, u32 frb) { @@ -3949,22 +3996,22 @@ private: void FNEG(u32 frd, u32 frb, bool rc) { CPU.FPR[frd] = -CPU.FPR[frb]; - if(rc) throw "FNEG(): fneg.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FMR(u32 frd, u32 frb, bool rc) { CPU.FPR[frd] = CPU.FPR[frb]; - if(rc) throw "FMR(): fmr.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FNABS(u32 frd, u32 frb, bool rc) { CPU.FPR[frd] = -fabs(CPU.FPR[frb]); - if(rc) throw "FNABS(): fnabs.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FABS(u32 frd, u32 frb, bool rc) { CPU.FPR[frd] = fabs(CPU.FPR[frb]); - if(rc) throw "FABS(): fabs.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void FCTID(u32 frd, u32 frb, bool rc) { @@ -4023,7 +4070,7 @@ private: } (u64&)CPU.FPR[frd] = r; - if(rc) throw "FCTID(): fctid."; + if(rc) CPU.UpdateCR1(); } void FCTIDZ(u32 frd, u32 frb, bool rc) { @@ -4061,7 +4108,7 @@ private: } (u64&)CPU.FPR[frd] = r; - if(rc) throw "FCTIDZ(): fctidz."; + if(rc) CPU.UpdateCR1(); } void FCFID(u32 frd, u32 frb, bool rc) { @@ -4083,7 +4130,7 @@ private: CPU.FPR[frd] = bf; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) throw "FCFID(): fcfid.";//CPU.UpdateCR1(CPU.FPR[frd]); + if(rc) CPU.UpdateCR1(); } void UNK(const u32 code, const u32 opcode, const u32 gcode) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 1451a80c27..8442c33d6d 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -3746,6 +3746,16 @@ void Compiler::STVLX(u32 vs, u32 ra, u32 rb) { addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); } +void Compiler::STDBRX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs), 0, false); +} + void Compiler::STSWX(u32 rs, u32 ra, u32 rb) { CompilationError("STSWX"); } diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 1d656cc4a1..c4bdeb6b6b 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -598,6 +598,7 @@ namespace ppu_recompiler_llvm { void LFDX(u32 frd, u32 ra, u32 rb) override; void LFDUX(u32 frd, u32 ra, u32 rb) override; void STVLX(u32 vs, u32 ra, u32 rb) override; + void STDBRX(u32 rd, u32 ra, u32 rb) override; void STSWX(u32 rs, u32 ra, u32 rb) override; void STWBRX(u32 rs, u32 ra, u32 rb) override; void STFSX(u32 frs, u32 ra, u32 rb) override; diff --git a/rpcs3/Emu/Cell/PPUOpcodes.h b/rpcs3/Emu/Cell/PPUOpcodes.h index 39b9e08aa9..aee76ba295 100644 --- a/rpcs3/Emu/Cell/PPUOpcodes.h +++ b/rpcs3/Emu/Cell/PPUOpcodes.h @@ -344,6 +344,8 @@ namespace PPU_opcodes DIVD = 0x1e9, DIVW = 0x1eb, LVLX = 0x207, //Load Vector Left Indexed + SUBFCO = 0x208, + ADDCO = 0x20a, LDBRX = 0x214, LSWX = 0x215, LWBRX = 0x216, @@ -351,21 +353,33 @@ namespace PPU_opcodes SRW = 0x218, SRD = 0x21b, LVRX = 0x227, //Load Vector Right Indexed + SUBFO = 0x228, LFSUX = 0x237, LSWI = 0x255, SYNC = 0x256, LFDX = 0x257, + NEGO = 0x268, LFDUX = 0x277, STVLX = 0x287, //Store Vector Left Indexed + SUBFEO = 0x288, + ADDEO = 0x28a, + STDBRX = 0x294, STSWX = 0x295, STWBRX = 0x296, STFSX = 0x297, STVRX = 0x2a7, //Store Vector Right Indexed STFSUX = 0x2b7, + SUBFZEO= 0x2c8, + ADDZEO = 0x2ca, STSWI = 0x2d5, STFDX = 0x2d7, //Store Floating-Point Double Indexed + SUBFMEO= 0x2e8, + MULLDO = 0x2e9, + ADDMEO = 0x2ea, + MULLWO = 0x2eb, STFDUX = 0x2f7, LVLXL = 0x307, //Load Vector Left Indexed Last + ADDO = 0x30a, LHBRX = 0x316, SRAW = 0x318, SRAD = 0x31a, @@ -380,9 +394,13 @@ namespace PPU_opcodes EXTSH = 0x39a, STVRXL = 0x3a7, //Store Vector Right Indexed Last EXTSB = 0x3ba, + DIVDUO = 0x3c9, + DIVWUO = 0x3cb, STFIWX = 0x3d7, EXTSW = 0x3da, ICBI = 0x3d6, //Instruction Cache Block Invalidate + DIVDO = 0x3e9, + DIVWO = 0x3eb, DCBZ = 0x3f6, //Data Cache Block Set to Zero }; @@ -759,6 +777,7 @@ public: virtual void LFDX(u32 frd, u32 ra, u32 rb) = 0; virtual void LFDUX(u32 frd, u32 ra, u32 rb) = 0; virtual void STVLX(u32 vs, u32 ra, u32 rb) = 0; + virtual void STDBRX(u32 rs, u32 ra, u32 rb) = 0; virtual void STSWX(u32 rs, u32 ra, u32 rb) = 0; virtual void STWBRX(u32 rs, u32 ra, u32 rb) = 0; virtual void STFSX(u32 frs, u32 ra, u32 rb) = 0; diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index c0d5eda43c..659b81443e 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -652,7 +652,7 @@ public: UpdateCRn(0, val, 0); } - template void UpdateCR1() + void UpdateCR1() { SetCR_LT(1, FPSCR.FX); SetCR_GT(1, FPSCR.FEX); @@ -670,6 +670,12 @@ public: bool IsCarry(const u64 a, const u64 b) { return (a + b) < a; } bool IsCarry(const u64 a, const u64 b, const u64 c) { return IsCarry(a, b) || IsCarry(a + b, c); } + void SetOV(const bool set) + { + XER.OV = set; + XER.SO |= set; + } + void SetFPSCRException(const FPSCR_EXP mask) { if ((FPSCR.FPSCR & mask) != mask) FPSCR.FX = 1; From 54827f7267fa00aac7204541a75d346107b67683 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:01:16 +0900 Subject: [PATCH 02/21] Fix conditional bcctr. --- rpcs3/Emu/Cell/PPUInterpreter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 460cf632a3..bc03684860 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -2243,7 +2243,7 @@ private: } void BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { - if(bo & 0x10 || CPU.IsCR(bi) == (bo & 0x8)) + if(bo & 0x10 || CPU.IsCR(bi) == ((bo & 0x8) != 0)) { const u32 nextLR = CPU.PC + 4; CPU.SetBranch(branchTarget(0, (u32)CPU.CTR), true); From e47d21024c0c9c561fa0f3a21b3b492b110b1db5 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:01:25 +0900 Subject: [PATCH 03/21] Fix mtcrf. --- rpcs3/Emu/Cell/PPUInterpreter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index bc03684860..6b22273d14 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -2693,7 +2693,7 @@ private: { if(crm & (1 << i)) { - CPU.SetCR(7 - i, CPU.GPR[rs] & (0xf << (i * 4))); + CPU.SetCR(7 - i, (CPU.GPR[rs] >> (i * 4)) & 0xf); } } } From ee508f85b4a78e3b0c4b8f4175e8f49135c92864 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:01:52 +0900 Subject: [PATCH 04/21] Fix handling of VRSAVE, and remove references to nonexistent USPRG registers. --- rpcs3/Emu/Cell/PPUInterpreter.h | 20 ++++---------------- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 22 ++++++++++++---------- rpcs3/Emu/Cell/PPULLVMRecompiler.h | 8 ++++---- rpcs3/Emu/Cell/PPUThread.cpp | 2 +- rpcs3/Emu/Cell/PPUThread.h | 3 ++- 5 files changed, 23 insertions(+), 32 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 6b22273d14..cee189cf89 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -144,14 +144,8 @@ private: case 0x001: return CPU.XER.XER; case 0x008: return CPU.LR; case 0x009: return CPU.CTR; - case 0x100: - case 0x101: - case 0x102: - case 0x103: - case 0x104: - case 0x105: - case 0x106: - case 0x107: return CPU.USPRG[n - 0x100]; + case 0x100: return CPU.VRSAVE; + case 0x103: return CPU.SPRG[3]; case 0x10C: CPU.TB = get_time(); return CPU.TB; case 0x10D: CPU.TB = get_time(); return CPU.TBH; @@ -178,14 +172,8 @@ private: case 0x001: CPU.XER.XER = value; return; case 0x008: CPU.LR = value; return; case 0x009: CPU.CTR = value; return; - case 0x100: - case 0x101: - case 0x102: - case 0x103: - case 0x104: - case 0x105: - case 0x106: - case 0x107: CPU.USPRG[n - 0x100] = value; return; + case 0x100: CPU.VRSAVE = (u32)value; return; + case 0x103: throw fmt::Format("WriteSPR(0x103, 0x%llx): Write to read-only SPR", value); case 0x10C: throw fmt::Format("WriteSPR(0x10C, 0x%llx): Write to time-based SPR", value); case 0x10D: throw fmt::Format("WriteSPR(0x10D, 0x%llx): Write to time-based SPR", value); diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 8442c33d6d..c14d0a4131 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -3311,7 +3311,7 @@ void Compiler::MFSPR(u32 rd, u32 spr) { rd_i64 = GetCtr(); break; case 0x100: - rd_i64 = GetUsprg0(); + rd_i64 = GetVrsave(); break; case 0x10C: rd_i64 = Call("get_time", get_time); @@ -3503,7 +3503,7 @@ void Compiler::MTSPR(u32 spr, u32 rs) { SetCtr(rs_i64); break; case 0x100: - SetUsprg0(rs_i64); + SetVrsave(rs_i64); break; default: assert(0); @@ -5278,17 +5278,19 @@ void Compiler::SetXerSo(Value * so) { SetXer(xer_i64); } -Value * Compiler::GetUsprg0() { - auto usrpg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, USPRG)); - auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usrpg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(usprg0_i64_ptr, 8); +Value * Compiler::GetVrsave() { + auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); + auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + auto val_i32 = m_ir_builder->CreateAlignedLoad(vrsave_i32_ptr, 4); + return m_ir_builder->CreateZExtOrTrunc(val_i32, m_ir_builder->getInt64Ty()); } -void Compiler::SetUsprg0(Value * val_x64) { +void Compiler::SetVrsave(Value * val_x64) { auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto usprg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, USPRG)); - auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usprg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i64, usprg0_i64_ptr, 8); + auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_i64, m_ir_builder->getInt32Ty()); + auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); + auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, vrsave_i32_ptr, 8); } Value * Compiler::GetFpscr() { diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index c4bdeb6b6b..715e9f08ab 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -860,11 +860,11 @@ namespace ppu_recompiler_llvm { /// Set the SO bit of XER void SetXerSo(llvm::Value * so); - /// Get USPRG0 - llvm::Value * GetUsprg0(); + /// Get VRSAVE + llvm::Value * GetVrsave(); - /// Set USPRG0 - void SetUsprg0(llvm::Value * val_x64); + /// Set VRSAVE + void SetVrsave(llvm::Value * val_x64); /// Load FPSCR llvm::Value * GetFpscr(); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index f84095e683..9e5588276a 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -43,7 +43,6 @@ void PPUThread::DoReset() memset(FPR, 0, sizeof(FPR)); memset(GPR, 0, sizeof(GPR)); memset(SPRG, 0, sizeof(SPRG)); - memset(USPRG, 0, sizeof(USPRG)); CR.CR = 0; LR = 0; @@ -52,6 +51,7 @@ void PPUThread::DoReset() XER.XER = 0; FPSCR.FPSCR = 0; VSCR.VSCR = 0; + VRSAVE = 0; cycle = 0; } diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 659b81443e..f107f1db03 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -531,7 +531,8 @@ public: u64 LR; //SPR 0x008 : Link Register u64 CTR; //SPR 0x009 : Count Register - u64 USPRG[8]; //SPR 0x100 - 0x107: User-SPR General-Purpose Registers + u32 VRSAVE; //SPR 0x100: VR Save/Restore Register (32 bits) + u64 SPRG[8]; //SPR 0x110 - 0x117 : SPR General-Purpose Registers //TBR : Time-Base Registers From 6cd3310171734788013cec7956d11a00b11ad76c Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:02:11 +0900 Subject: [PATCH 05/21] Fix FPSCR manipulation instructions. --- rpcs3/Emu/Cell/PPUInterpreter.h | 36 +++++++++------------------------ rpcs3/Emu/Cell/PPUThread.h | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index cee189cf89..868cb4252f 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -3631,41 +3631,23 @@ private: void MTFSB1(u32 crbd, bool rc) { u64 mask = (1ULL << (31 - crbd)); + if ((crbd >= 3 && crbd <= 6) && !(CPU.FPSCR.FPSCR & mask)) mask |= 1ULL << 31; //FPSCR.FX if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); - CPU.FPSCR.FPSCR |= mask; + CPU.SetFPSCR(CPU.FPSCR.FPSCR | mask); if(rc) CPU.UpdateCR1(); } void MCRFS(u32 crbd, u32 crbs) { - CPU.SetCR(crbd, (CPU.FPSCR.FPSCR >> ((7 - crbs) * 4)) & 0xf); - - switch (crbs) - { - case 0: - CPU.FPSCR.FX = CPU.FPSCR.OX = 0; - break; - case 1: - CPU.FPSCR.UX = CPU.FPSCR.ZX = CPU.FPSCR.XX = CPU.FPSCR.VXSNAN = 0; - break; - case 2: - CPU.FPSCR.VXISI = CPU.FPSCR.VXIDI = CPU.FPSCR.VXZDZ = CPU.FPSCR.VXIMZ = 0; - break; - case 3: - CPU.FPSCR.VXVC = 0; - break; - case 5: - CPU.FPSCR.VXSOFT = CPU.FPSCR.VXSQRT = CPU.FPSCR.VXCVI = 0; - break; - default: - break; - } + CPU.SetCR(crbd, (CPU.FPSCR.FPSCR >> ((7 - crbs) * 4)) & 0xf); + const u32 exceptions_mask = 0x9FF80700; + CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~(exceptions_mask & 0xf << ((7 - crbs) * 4))); } void MTFSB0(u32 crbd, bool rc) { u64 mask = (1ULL << (31 - crbd)); if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); - CPU.FPSCR.FPSCR &= ~mask; + CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~mask); if(rc) CPU.UpdateCR1(); } @@ -3675,8 +3657,7 @@ private: u32 val = (i & 0xF) << ((7 - crfd) * 4); const u32 oldNI = CPU.FPSCR.NI; - CPU.FPSCR.FPSCR &= ~mask; - CPU.FPSCR.FPSCR |= val; + CPU.SetFPSCR((CPU.FPSCR.FPSCR & ~mask) | val); if (CPU.FPSCR.NI != oldNI) { if (oldNI) @@ -3699,9 +3680,10 @@ private: { if(flm & (1 << i)) mask |= 0xf << (i * 4); } + mask &= ~0x60000000; const u32 oldNI = CPU.FPSCR.NI; - CPU.FPSCR.FPSCR = (CPU.FPSCR.FPSCR & ~mask) | ((u32&)CPU.FPR[frb] & mask); + CPU.SetFPSCR((CPU.FPSCR.FPSCR & ~mask) | ((u32&)CPU.FPR[frb] & mask)); if (CPU.FPSCR.NI != oldNI) { if (oldNI) diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index f107f1db03..5256eaa0e5 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -53,6 +53,8 @@ enum FPSCR_EXP FPSCR_VXSOFT = 0x00000400, FPSCR_VXSQRT = 0x00000200, FPSCR_VXCVI = 0x00000100, + + FPSCR_VX_ALL = FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC | FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI, }; enum FPSCR_RN @@ -677,6 +679,25 @@ public: XER.SO |= set; } + void UpdateFPSCR_FEX() + { + const u32 exceptions = (FPSCR.FPSCR >> 25) & 0x1F; + const u32 enabled = (FPSCR.FPSCR >> 3) & 0x1F; + if (exceptions & enabled) FPSCR.FEX = 1; + } + + void UpdateFPSCR_VX() + { + if (FPSCR.FPSCR & FPSCR_VX_ALL) FPSCR.VX = 1; + } + + void SetFPSCR(const u32 val) + { + FPSCR.FPSCR = val & ~(FPSCR_FEX | FPSCR_VX); + UpdateFPSCR_VX(); + UpdateFPSCR_FEX(); + } + void SetFPSCRException(const FPSCR_EXP mask) { if ((FPSCR.FPSCR & mask) != mask) FPSCR.FX = 1; From 42b55c60e7766993e2e19fd958a65d4d0fc15247 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:02:18 +0900 Subject: [PATCH 06/21] Fix fcmp. --- rpcs3/Emu/Cell/PPUThread.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 5256eaa0e5..ebd8d4f7a1 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -702,6 +702,8 @@ public: { if ((FPSCR.FPSCR & mask) != mask) FPSCR.FX = 1; FPSCR.FPSCR |= mask; + UpdateFPSCR_VX(); + UpdateFPSCR_FEX(); } void SetFPSCR_FI(const u32 val) From b7761beee9c8f5025b1835bdc8cf360fb7270d93 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:02:50 +0900 Subject: [PATCH 07/21] Fix lfs/stfs with SNaNs. --- rpcs3/Emu/Cell/PPUInterpreter.h | 92 ++++++++++++++++++++++++++++++--- rpcs3/Emu/Cell/PPUThread.h | 4 ++ 2 files changed, 88 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 868cb4252f..31bddf28e0 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -3059,7 +3059,16 @@ private: void LFSX(u32 frd, u32 ra, u32 rb) { const u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; - CPU.FPR[frd] = vm::get_ref>(vm::cast(addr)).value(); + float val = vm::get_ref>(vm::cast(addr)).value(); + if (!FPRdouble::IsNaN(val)) + { + CPU.FPR[frd] = val; + } + else + { + u64 bits = (u32&)val; + (u64&)CPU.FPR[frd] = (bits & 0x80000000) << 32 | 7ULL << 60 | (bits & 0x7fffffff) << 29; + } } void SRW(u32 ra, u32 rs, u32 rb, bool rc) { @@ -3120,7 +3129,16 @@ private: void LFSUX(u32 frd, u32 ra, u32 rb) { const u64 addr = CPU.GPR[ra] + CPU.GPR[rb]; - CPU.FPR[frd] = vm::get_ref>(vm::cast(addr)).value(); + float val = vm::get_ref>(vm::cast(addr)).value(); + if (!FPRdouble::IsNaN(val)) + { + CPU.FPR[frd] = val; + } + else + { + u64 bits = (u32&)val; + (u64&)CPU.FPR[frd] = (bits & 0x80000000) << 32 | 7ULL << 60 | (bits & 0x7fffffff) << 29; + } CPU.GPR[ra] = addr; } void SYNC(u32 l) @@ -3176,7 +3194,17 @@ private: void STFSX(u32 frs, u32 ra, u32 rb) { const u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; - vm::get_ref>(vm::cast(addr)) = (float)CPU.FPR[frs]; + double val = CPU.FPR[frs]; + if (!FPRdouble::IsNaN(val)) + { + vm::get_ref>(vm::cast(addr)) = (float)val; + } + else + { + u64 bits = (u64&)val; + u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); + vm::get_ref>(vm::cast(addr)) = (float)bits32; + } } void STVRX(u32 vs, u32 ra, u32 rb) { @@ -3188,7 +3216,17 @@ private: void STFSUX(u32 frs, u32 ra, u32 rb) { const u64 addr = CPU.GPR[ra] + CPU.GPR[rb]; - vm::get_ref>(vm::cast(addr)) = (float)CPU.FPR[frs]; + double val = CPU.FPR[frs]; + if (!FPRdouble::IsNaN(val)) + { + vm::get_ref>(vm::cast(addr)) = (float)val; + } + else + { + u64 bits = (u64&)val; + u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); + vm::get_ref>(vm::cast(addr)) = (float)bits32; + } CPU.GPR[ra] = addr; } void STSWI(u32 rd, u32 ra, u32 nb) @@ -3459,12 +3497,30 @@ private: void LFS(u32 frd, u32 ra, s32 d) { const u64 addr = ra ? CPU.GPR[ra] + d : d; - CPU.FPR[frd] = vm::get_ref>(vm::cast(addr)).value(); + float val = vm::get_ref>(vm::cast(addr)).value(); + if (!FPRdouble::IsNaN(val)) + { + CPU.FPR[frd] = val; + } + else + { + u64 bits = (u32&)val; + (u64&)CPU.FPR[frd] = (bits & 0x80000000) << 32 | 7ULL << 60 | (bits & 0x7fffffff) << 29; + } } void LFSU(u32 frd, u32 ra, s32 ds) { const u64 addr = CPU.GPR[ra] + ds; - CPU.FPR[frd] = vm::get_ref>(vm::cast(addr)).value(); + float val = vm::get_ref>(vm::cast(addr)).value(); + if (!FPRdouble::IsNaN(val)) + { + CPU.FPR[frd] = val; + } + else + { + u64 bits = (u32&)val; + (u64&)CPU.FPR[frd] = (bits & 0x80000000) << 32 | 7ULL << 60 | (bits & 0x7fffffff) << 29; + } CPU.GPR[ra] = addr; } void LFD(u32 frd, u32 ra, s32 d) @@ -3481,12 +3537,32 @@ private: void STFS(u32 frs, u32 ra, s32 d) { const u64 addr = ra ? CPU.GPR[ra] + d : d; - vm::get_ref>(vm::cast(addr)) = (float)CPU.FPR[frs]; + double val = CPU.FPR[frs]; + if (!FPRdouble::IsNaN(val)) + { + vm::get_ref>(vm::cast(addr)) = (float)val; + } + else + { + u64 bits = (u64&)val; + u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); + vm::get_ref>(vm::cast(addr)) = (float)bits32; + } } void STFSU(u32 frs, u32 ra, s32 d) { const u64 addr = CPU.GPR[ra] + d; - vm::get_ref>(vm::cast(addr)) = (float)CPU.FPR[frs]; + double val = CPU.FPR[frs]; + if (!FPRdouble::IsNaN(val)) + { + vm::get_ref>(vm::cast(addr)) = (float)val; + } + else + { + u64 bits = (u64&)val; + u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); + vm::get_ref>(vm::cast(addr)) = (float)bits32; + } CPU.GPR[ra] = addr; } void STFD(u32 frs, u32 ra, s32 d) diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index ebd8d4f7a1..8a503151e9 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -443,18 +443,22 @@ struct PPCdouble PPCdouble() : _u64(0) { + type = UpdateType(); } PPCdouble(double val) : _double(val) { + type = UpdateType(); } PPCdouble(u64 val) : _u64(val) { + type = UpdateType(); } PPCdouble(u32 val) : _u64(val) { + type = UpdateType(); } }; From d78fe3b4d4de35dc0dd34ac8911969f36f51ea6f Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:03:02 +0900 Subject: [PATCH 08/21] Fix detection of denormals in frsp. --- rpcs3/Emu/Cell/PPUInterpreter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 31bddf28e0..d5be7293b9 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -3795,7 +3795,10 @@ private: const double r = static_cast(b0); CPU.FPSCR.FR = fabs(r) > fabs(b); CPU.SetFPSCR_FI(b != r); - CPU.FPSCR.FPRF = PPCdouble(r).GetType(); + u32 type = PPCdouble(r).GetType(); + if (type == FPR_PN && r < ldexp(1.0, -126)) type = FPR_PD; + else if (type == FPR_NN && r > ldexp(-1.0, -126)) type = FPR_ND; + CPU.FPSCR.FPRF = type; CPU.FPR[frd] = r; } void FCTIW(u32 frd, u32 frb, bool rc) From 74cbf1877be0b3bc13a2172c8004c9a4553bd475 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:03:16 +0900 Subject: [PATCH 09/21] Fix stwcx. and stdcx. not properly clearing the reservation address. --- rpcs3/Emu/Cell/PPUInterpreter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index d5be7293b9..9b946aaa78 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -2698,12 +2698,12 @@ private: if (CPU.R_ADDR == addr) { CPU.SetCR_EQ(0, InterlockedCompareExchange(vm::get_ptr(vm::cast(CPU.R_ADDR)), re32((u32)CPU.GPR[rs]), (u32)CPU.R_VALUE) == (u32)CPU.R_VALUE); - CPU.R_ADDR = 0; } else { CPU.SetCR_EQ(0, false); } + CPU.R_ADDR = 0; } void STWX(u32 rs, u32 ra, u32 rb) { @@ -2757,12 +2757,12 @@ private: if (CPU.R_ADDR == addr) { CPU.SetCR_EQ(0, InterlockedCompareExchange(vm::get_ptr(vm::cast(CPU.R_ADDR)), re64(CPU.GPR[rs]), CPU.R_VALUE) == CPU.R_VALUE); - CPU.R_ADDR = 0; } else { CPU.SetCR_EQ(0, false); } + CPU.R_ADDR = 0; } void STBX(u32 rs, u32 ra, u32 rb) { From bbe12bf4b1fde69baa2bbb794ad3ba02770594ef Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:03:48 +0900 Subject: [PATCH 10/21] Fix mftbu returning the wrong half of the timebase register. --- rpcs3/Emu/Cell/PPUInterpreter.h | 4 ++-- rpcs3/Emu/Cell/PPUThread.h | 11 +---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 9b946aaa78..1cb49ac04d 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -148,7 +148,7 @@ private: case 0x103: return CPU.SPRG[3]; case 0x10C: CPU.TB = get_time(); return CPU.TB; - case 0x10D: CPU.TB = get_time(); return CPU.TBH; + case 0x10D: CPU.TB = get_time(); return CPU.TB >> 32; case 0x110: case 0x111: @@ -2884,7 +2884,7 @@ private: switch(n) { case 0x10C: CPU.GPR[rd] = CPU.TB; break; - case 0x10D: CPU.GPR[rd] = CPU.TBH; break; + case 0x10D: CPU.GPR[rd] = CPU.TB >> 32; break; default: throw fmt::Format("mftb r%d, %d", rd, spr); } } diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 8a503151e9..1ce02422cd 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -542,16 +542,7 @@ public: u64 SPRG[8]; //SPR 0x110 - 0x117 : SPR General-Purpose Registers //TBR : Time-Base Registers - union - { - u64 TB; //TBR 0x10C - 0x10D - - struct - { - u32 TBH; - u32 TBL; - }; - }; + u64 TB; //TBR 0x10C - 0x10D u64 cycle; From 46fa6455551e03845b834c5368e61f70d954ab66 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:04:01 +0900 Subject: [PATCH 11/21] Reduce code duplication in FPU instruction implementations. --- rpcs3/Emu/Cell/PPUInterpreter.h | 260 +++++++------------------------- 1 file changed, 57 insertions(+), 203 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 1cb49ac04d..8fec7e0b75 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -3593,64 +3593,10 @@ private: const u64 addr = ra ? CPU.GPR[ra] + ds : ds; CPU.GPR[rd] = (s64)(s32)vm::read32(vm::cast(addr)); } - void FDIVS(u32 frd, u32 fra, u32 frb, bool rc) - { - if(FPRdouble::IsNaN(CPU.FPR[fra])) - { - CPU.FPR[frd] = CPU.FPR[fra]; - } - else if(FPRdouble::IsNaN(CPU.FPR[frb])) - { - CPU.FPR[frd] = CPU.FPR[frb]; - } - else - { - if(CPU.FPR[frb] == 0.0) - { - if(CPU.FPR[fra] == 0.0) - { - CPU.FPSCR.VXZDZ = true; - CPU.FPR[frd] = FPR_NAN; - } - else - { - CPU.FPR[frd] = (float)(CPU.FPR[fra] / CPU.FPR[frb]); - } - - CPU.FPSCR.ZX = true; - } - else if(FPRdouble::IsINF(CPU.FPR[fra]) && FPRdouble::IsINF(CPU.FPR[frb])) - { - CPU.FPSCR.VXIDI = true; - CPU.FPR[frd] = FPR_NAN; - } - else - { - CPU.FPR[frd] = (float)(CPU.FPR[fra] / CPU.FPR[frb]); - } - } - - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FSUBS(u32 frd, u32 fra, u32 frb, bool rc) - { - CPU.FPR[frd] = static_cast(CPU.FPR[fra] - CPU.FPR[frb]); - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FADDS(u32 frd, u32 fra, u32 frb, bool rc) - { - CPU.FPR[frd] = static_cast(CPU.FPR[fra] + CPU.FPR[frb]); - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FSQRTS(u32 frd, u32 frb, bool rc) - { - CPU.FPR[frd] = static_cast(sqrt(CPU.FPR[frb])); - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } + void FDIVS(u32 frd, u32 fra, u32 frb, bool rc) {FDIV(frd, fra, frb, rc, true);} + void FSUBS(u32 frd, u32 fra, u32 frb, bool rc) {FSUB(frd, fra, frb, rc, true);} + void FADDS(u32 frd, u32 fra, u32 frb, bool rc) {FADD(frd, fra, frb, rc, true);} + void FSQRTS(u32 frd, u32 frb, bool rc) {FSQRT(frd, frb, rc, true);} void FRES(u32 frd, u32 frb, bool rc) { if(CPU.FPR[frb] == 0.0) @@ -3660,38 +3606,11 @@ private: CPU.FPR[frd] = static_cast(1.0 / CPU.FPR[frb]); if(rc) CPU.UpdateCR1(); } - void FMULS(u32 frd, u32 fra, u32 frc, bool rc) - { - CPU.FPR[frd] = static_cast(CPU.FPR[fra] * CPU.FPR[frc]); - CPU.FPSCR.FI = 0; - CPU.FPSCR.FR = 0; - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) - { - CPU.FPR[frd] = static_cast(CPU.FPR[fra] * CPU.FPR[frc] + CPU.FPR[frb]); - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) - { - CPU.FPR[frd] = static_cast(CPU.FPR[fra] * CPU.FPR[frc] - CPU.FPR[frb]); - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) - { - CPU.FPR[frd] = static_cast(-(CPU.FPR[fra] * CPU.FPR[frc] - CPU.FPR[frb])); - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) - { - CPU.FPR[frd] = static_cast(-(CPU.FPR[fra] * CPU.FPR[frc] + CPU.FPR[frb])); - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } + void FMULS(u32 frd, u32 fra, u32 frc, bool rc) {FMUL(frd, fra, frc, rc, true);} + void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, false, false, true);} + void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, false, true, true);} + void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, true, true, true);} + void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, true, false, true);} void STD(u32 rs, u32 ra, s32 d) { const u64 addr = ra ? CPU.GPR[ra] + d : d; @@ -3801,7 +3720,8 @@ private: CPU.FPSCR.FPRF = type; CPU.FPR[frd] = r; } - void FCTIW(u32 frd, u32 frb, bool rc) + void FCTIW(u32 frd, u32 frb, bool rc) {FCTIW(frd, frb, rc, false);} + void FCTIW(u32 frd, u32 frb, bool rc, bool truncate) { const double b = CPU.FPR[frb]; u32 r; @@ -3822,7 +3742,8 @@ private: else { s32 i = 0; - switch(CPU.FPSCR.RN) + const u32 rn = truncate ? FPSCR_RN_ZERO : CPU.FPSCR.RN; + switch(rn) { case FPSCR_RN_NEAR: { @@ -3860,45 +3781,9 @@ private: (u64&)CPU.FPR[frd] = r; if(rc) CPU.UpdateCR1(); } - void FCTIWZ(u32 frd, u32 frb, bool rc) - { - const double b = CPU.FPR[frb]; - u32 value; - if (b > (double)0x7fffffff) - { - value = 0x7fffffff; - CPU.SetFPSCRException(FPSCR_VXCVI); - CPU.FPSCR.FI = 0; - CPU.FPSCR.FR = 0; - } - else if (b < -(double)0x80000000) - { - value = 0x80000000; - CPU.SetFPSCRException(FPSCR_VXCVI); - CPU.FPSCR.FI = 0; - CPU.FPSCR.FR = 0; - } - else - { - s32 i = (s32)b; - double di = i; - if (di == b) - { - CPU.SetFPSCR_FI(0); - CPU.FPSCR.FR = 0; - } - else - { - CPU.SetFPSCR_FI(1); - CPU.FPSCR.FR = fabs(di) > fabs(b); - } - value = (u32)i; - } - - (u64&)CPU.FPR[frd] = (u64)value; - if(rc) CPU.UpdateCR1(); - } - void FDIV(u32 frd, u32 fra, u32 frb, bool rc) + void FCTIWZ(u32 frd, u32 frb, bool rc) {FCTIW(frd, frb, rc, true);} + void FDIV(u32 frd, u32 fra, u32 frb, bool rc) {FDIV(frd, fra, frb, rc, false);} + void FDIV(u32 frd, u32 fra, u32 frb, bool rc, bool single) { double res; @@ -3937,25 +3822,35 @@ private: } } - CPU.FPR[frd] = res; + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } - void FSUB(u32 frd, u32 fra, u32 frb, bool rc) + void FSUB(u32 frd, u32 fra, u32 frb, bool rc) {FSUB(frd, fra, frb, rc, false);} + void FSUB(u32 frd, u32 fra, u32 frb, bool rc, bool single) { - CPU.FPR[frd] = CPU.FPR[fra] - CPU.FPR[frb]; + const double res = CPU.FPR[fra] - CPU.FPR[frb]; + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } - void FADD(u32 frd, u32 fra, u32 frb, bool rc) + void FADD(u32 frd, u32 fra, u32 frb, bool rc) {FADD(frd, fra, frb, rc, false);} + void FADD(u32 frd, u32 fra, u32 frb, bool rc, bool single) { - CPU.FPR[frd] = CPU.FPR[fra] + CPU.FPR[frb]; + const double res = CPU.FPR[fra] + CPU.FPR[frb]; + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } - void FSQRT(u32 frd, u32 frb, bool rc) + void FSQRT(u32 frd, u32 frb, bool rc) {FSQRT(frd, frb, rc, false);} + void FSQRT(u32 frd, u32 frb, bool rc, bool single) { - CPU.FPR[frd] = sqrt(CPU.FPR[frb]); + const double res = sqrt(CPU.FPR[frb]); + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } @@ -3964,15 +3859,16 @@ private: CPU.FPR[frd] = CPU.FPR[fra] >= 0.0 ? CPU.FPR[frc] : CPU.FPR[frb]; if(rc) CPU.UpdateCR1(); } - void FMUL(u32 frd, u32 fra, u32 frc, bool rc) + void FMUL(u32 frd, u32 fra, u32 frc, bool rc) {FMUL(frd, fra, frc, rc, false);} + void FMUL(u32 frd, u32 fra, u32 frc, bool rc, bool single) { + double res; if((FPRdouble::IsINF(CPU.FPR[fra]) && CPU.FPR[frc] == 0.0) || (FPRdouble::IsINF(CPU.FPR[frc]) && CPU.FPR[fra] == 0.0)) { CPU.SetFPSCRException(FPSCR_VXIMZ); - CPU.FPR[frd] = FPR_NAN; + res = FPR_NAN; CPU.FPSCR.FI = 0; CPU.FPSCR.FR = 0; - CPU.FPSCR.FPRF = FPR_QNAN; } else { @@ -3981,10 +3877,12 @@ private: CPU.SetFPSCRException(FPSCR_VXSNAN); } - CPU.FPR[frd] = CPU.FPR[fra] * CPU.FPR[frc]; - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); + res = CPU.FPR[fra] * CPU.FPR[frc]; } + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; + CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } void FRSQRTE(u32 frd, u32 frb, bool rc) @@ -3996,30 +3894,21 @@ private: CPU.FPR[frd] = 1.0 / sqrt(CPU.FPR[frb]); if(rc) CPU.UpdateCR1(); } - void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) + void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, false, true, false);} + void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, false, false, false);} + void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc, bool neg, bool sub, bool single) { - CPU.FPR[frd] = CPU.FPR[fra] * CPU.FPR[frc] - CPU.FPR[frb]; - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) - { - CPU.FPR[frd] = CPU.FPR[fra] * CPU.FPR[frc] + CPU.FPR[frb]; - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) - { - CPU.FPR[frd] = -(CPU.FPR[fra] * CPU.FPR[frc] - CPU.FPR[frb]); - CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); - if(rc) CPU.UpdateCR1(); - } - void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) - { - CPU.FPR[frd] = -(CPU.FPR[fra] * CPU.FPR[frc] + CPU.FPR[frb]); + const double a = CPU.FPR[fra]; + const double b = CPU.FPR[frb]; + const double c = CPU.FPR[frc]; + const double res = a * c + (sub ? -b : b); + if(single) CPU.FPR[frd] = (float)(neg ? -res : res); + else CPU.FPR[frd] = (neg ? -res : res); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } + void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, true, true, false);} + void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, true, false, false);} void FCMPO(u32 crfd, u32 fra, u32 frb) { int cmp_res = FPRdouble::Cmp(CPU.FPR[fra], CPU.FPR[frb]); @@ -4062,7 +3951,8 @@ private: CPU.FPR[frd] = fabs(CPU.FPR[frb]); if(rc) CPU.UpdateCR1(); } - void FCTID(u32 frd, u32 frb, bool rc) + void FCTID(u32 frd, u32 frb, bool rc) {FCTID(frd, frb, rc, false);} + void FCTID(u32 frd, u32 frb, bool rc, bool truncate) { const double b = CPU.FPR[frb]; u64 r; @@ -4083,7 +3973,8 @@ private: else { s64 i = 0; - switch(CPU.FPSCR.RN) + const u32 rn = truncate ? FPSCR_RN_ZERO : CPU.FPSCR.RN; + switch(rn) { case FPSCR_RN_NEAR: { @@ -4121,44 +4012,7 @@ private: (u64&)CPU.FPR[frd] = r; if(rc) CPU.UpdateCR1(); } - void FCTIDZ(u32 frd, u32 frb, bool rc) - { - const double b = CPU.FPR[frb]; - u64 r; - if(b > (double)0x7fffffffffffffff) - { - r = 0x7fffffffffffffff; - CPU.SetFPSCRException(FPSCR_VXCVI); - CPU.FPSCR.FI = 0; - CPU.FPSCR.FR = 0; - } - else if (b < -(double)0x8000000000000000) - { - r = 0x8000000000000000; - CPU.SetFPSCRException(FPSCR_VXCVI); - CPU.FPSCR.FI = 0; - CPU.FPSCR.FR = 0; - } - else - { - s64 i = (s64)b; - double di = (double)i; - if (di == b) - { - CPU.SetFPSCR_FI(0); - CPU.FPSCR.FR = 0; - } - else - { - CPU.SetFPSCR_FI(1); - CPU.FPSCR.FR = fabs(di) > fabs(b); - } - r = (u64)i; - } - - (u64&)CPU.FPR[frd] = r; - if(rc) CPU.UpdateCR1(); - } + void FCTIDZ(u32 frd, u32 frb, bool rc) {FCTID(frd, frb, rc, true);} void FCFID(u32 frd, u32 frb, bool rc) { s64 bi = (s64&)CPU.FPR[frb]; From 5fd88bf801266131a32c0dbf8b53d774595b82dc Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:04:13 +0900 Subject: [PATCH 12/21] Fix invalid double constant. --- rpcs3/Emu/Cell/PPUInterpreter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 8fec7e0b75..5a25ad5346 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -3956,7 +3956,7 @@ private: { const double b = CPU.FPR[frb]; u64 r; - if(b > (double)0x7fffffffffffffff) + if(b >= (double)0x8000000000000000) { r = 0x7fffffffffffffff; CPU.SetFPSCRException(FPSCR_VXCVI); From fee20573d2155f0a5b8f7b8d9d2cc88e8f6b9da4 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:04:22 +0900 Subject: [PATCH 13/21] Fix fcti rounding. --- rpcs3/Emu/Cell/PPUInterpreter.h | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 5a25ad5346..3a9c1b3824 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -3746,22 +3746,16 @@ private: switch(rn) { case FPSCR_RN_NEAR: - { - double t = b + 0.5; - i = (s32)t; - if (t - i < 0 || (t - i == 0 && b > 0)) i--; - break; - } + i = (s32)nearbyint(b); + break; case FPSCR_RN_ZERO: i = (s32)b; break; case FPSCR_RN_PINF: - i = (s32)b; - if (b - i > 0) i++; + i = (s32)ceil(b); break; case FPSCR_RN_MINF: - i = (s32)b; - if (b - i < 0) i--; + i = (s32)floor(b); break; } r = (u32)i; @@ -3977,22 +3971,16 @@ private: switch(rn) { case FPSCR_RN_NEAR: - { - double t = b + 0.5; - i = (s64)t; - if (t - i < 0 || (t - i == 0 && b > 0)) i--; - break; - } + i = (s64)nearbyint(b); + break; case FPSCR_RN_ZERO: i = (s64)b; break; case FPSCR_RN_PINF: - i = (s64)b; - if (b - i > 0) i++; + i = (s64)ceil(b); break; case FPSCR_RN_MINF: - i = (s64)b; - if (b - i < 0) i--; + i = (s64)floor(b); break; } r = (u64)i; From 43e74a9c61f467d145fded785c9f9724e7d25ffc Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:04:35 +0900 Subject: [PATCH 14/21] Fix intermediate rounding of fused multiply-add instructions. --- rpcs3/Emu/Cell/PPUInterpreter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 3a9c1b3824..94de3f12c3 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -3895,7 +3895,7 @@ private: const double a = CPU.FPR[fra]; const double b = CPU.FPR[frb]; const double c = CPU.FPR[frc]; - const double res = a * c + (sub ? -b : b); + const double res = fma(a, c, sub ? -b : b); if(single) CPU.FPR[frd] = (float)(neg ? -res : res); else CPU.FPR[frd] = (neg ? -res : res); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); From 14e0cbde9cf7a8dfcf44b9ede1f44b784bddb87d Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:04:45 +0900 Subject: [PATCH 15/21] Catch invalid and zero-division exceptions in FPU instructions. --- rpcs3/Emu/Cell/PPUInterpreter.h | 454 +++++++++++++++++++++++++++----- 1 file changed, 385 insertions(+), 69 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 94de3f12c3..c1ff4a8db1 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -53,6 +53,13 @@ u64 rotr64(const u64 x, const u8 n) { return (x >> n) | (x << (64 - n)); } #define rotl64 _rotl64 #define rotr64 _rotr64 +static double SilenceNaN(double x) +{ + u64 bits = (u64&)x; + bits |= 0x0008000000000000ULL; + return (double&)bits; +} + namespace ppu_recompiler_llvm { class Compiler; } @@ -3599,11 +3606,39 @@ private: void FSQRTS(u32 frd, u32 frb, bool rc) {FSQRT(frd, frb, rc, true);} void FRES(u32 frd, u32 frb, bool rc) { - if(CPU.FPR[frb] == 0.0) + const double b = CPU.FPR[frb]; + if(FPRdouble::IsSNaN(b)) + { + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + } + if(FPRdouble::IsNaN(b)) + { + CPU.FPR[frd] = SilenceNaN(b); + } + else if(b == 0.0) { CPU.SetFPSCRException(FPSCR_ZX); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if (CPU.FPSCR.ZE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = 1.0 / b; } - CPU.FPR[frd] = static_cast(1.0 / CPU.FPR[frb]); + else + { + CPU.FPR[frd] = static_cast(1.0 / b); + } + CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } void FMULS(u32 frd, u32 fra, u32 frc, bool rc) {FMUL(frd, fra, frc, rc, true);} @@ -3706,38 +3741,69 @@ private: void FRSP(u32 frd, u32 frb, bool rc) { const double b = CPU.FPR[frb]; + if (FPRdouble::IsSNaN(b)) + { + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if (CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + } double b0 = b; if(CPU.FPSCR.NI) { if (((u64&)b0 & DOUBLE_EXP) < 0x3800000000000000ULL) (u64&)b0 &= DOUBLE_SIGN; } const double r = static_cast(b0); - CPU.FPSCR.FR = fabs(r) > fabs(b); - CPU.SetFPSCR_FI(b != r); + if (FPRdouble::IsNaN(r)) + { + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + } + else + { + CPU.FPSCR.FR = fabs(r) > fabs(b); + CPU.SetFPSCR_FI(b != r); + } u32 type = PPCdouble(r).GetType(); if (type == FPR_PN && r < ldexp(1.0, -126)) type = FPR_PD; else if (type == FPR_NN && r > ldexp(-1.0, -126)) type = FPR_ND; CPU.FPSCR.FPRF = type; CPU.FPR[frd] = r; + if(rc) CPU.UpdateCR1(); } void FCTIW(u32 frd, u32 frb, bool rc) {FCTIW(frd, frb, rc, false);} void FCTIW(u32 frd, u32 frb, bool rc, bool truncate) { const double b = CPU.FPR[frb]; u32 r; - if(b > (double)0x7fffffff) + if (FPRdouble::IsNaN(b) || b < -(double)0x80000000) { - r = 0x7fffffff; CPU.SetFPSCRException(FPSCR_VXCVI); + if(FPRdouble::IsSNaN(b)) CPU.SetFPSCRException(FPSCR_VXSNAN); CPU.FPSCR.FI = 0; CPU.FPSCR.FR = 0; - } - else if (b < -(double)0x80000000) - { + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } r = 0x80000000; + } + else if(b > (double)0x7fffffff) + { CPU.SetFPSCRException(FPSCR_VXCVI); CPU.FPSCR.FI = 0; CPU.FPSCR.FR = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + r = 0x7fffffff; } else { @@ -3779,72 +3845,199 @@ private: void FDIV(u32 frd, u32 fra, u32 frb, bool rc) {FDIV(frd, fra, frb, rc, false);} void FDIV(u32 frd, u32 fra, u32 frb, bool rc, bool single) { - double res; - - if(FPRdouble::IsNaN(CPU.FPR[fra])) + const double a = CPU.FPR[fra]; + const double b = CPU.FPR[frb]; + if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b)) { - res = CPU.FPR[fra]; + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } } - else if(FPRdouble::IsNaN(CPU.FPR[frb])) + if(FPRdouble::IsNaN(a)) { - res = CPU.FPR[frb]; + CPU.FPR[frd] = SilenceNaN(a); + } + else if(FPRdouble::IsNaN(b)) + { + CPU.FPR[frd] = SilenceNaN(b); + } + else if(a == 0.0 && b == 0.0) + { + CPU.SetFPSCRException(FPSCR_VXZDZ); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; + } + else if(FPRdouble::IsINF(a) && FPRdouble::IsINF(b)) + { + CPU.SetFPSCRException(FPSCR_VXIDI); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; } else { - if(CPU.FPR[frb] == 0.0) + if(b == 0.0) { - if(CPU.FPR[fra] == 0.0) - { - CPU.FPSCR.VXZDZ = 1; - res = FPR_NAN; - } - else - { - res = CPU.FPR[fra] / CPU.FPR[frb]; - } - CPU.SetFPSCRException(FPSCR_ZX); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if (CPU.FPSCR.ZE) + { + if(rc) CPU.UpdateCR1(); + return; + } } - else if(FPRdouble::IsINF(CPU.FPR[fra]) && FPRdouble::IsINF(CPU.FPR[frb])) - { - CPU.FPSCR.VXIDI = 1; - res = FPR_NAN; - } - else - { - res = CPU.FPR[fra] / CPU.FPR[frb]; - } + const double res = a / b; + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; } - if(single) CPU.FPR[frd] = (float)res; - else CPU.FPR[frd] = res; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } void FSUB(u32 frd, u32 fra, u32 frb, bool rc) {FSUB(frd, fra, frb, rc, false);} void FSUB(u32 frd, u32 fra, u32 frb, bool rc, bool single) { - const double res = CPU.FPR[fra] - CPU.FPR[frb]; - if(single) CPU.FPR[frd] = (float)res; - else CPU.FPR[frd] = res; + const double a = CPU.FPR[fra]; + const double b = CPU.FPR[frb]; + if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b)) + { + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + } + if(FPRdouble::IsNaN(a)) + { + CPU.FPR[frd] = SilenceNaN(a); + } + else if(FPRdouble::IsNaN(b)) + { + CPU.FPR[frd] = SilenceNaN(b); + } + else if(FPRdouble::IsINF(a) && FPRdouble::IsINF(b) && a == b) + { + CPU.SetFPSCRException(FPSCR_VXISI); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; + } + else + { + const double res = a - b; + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; + } CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } void FADD(u32 frd, u32 fra, u32 frb, bool rc) {FADD(frd, fra, frb, rc, false);} void FADD(u32 frd, u32 fra, u32 frb, bool rc, bool single) { - const double res = CPU.FPR[fra] + CPU.FPR[frb]; - if(single) CPU.FPR[frd] = (float)res; - else CPU.FPR[frd] = res; + const double a = CPU.FPR[fra]; + const double b = CPU.FPR[frb]; + if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b)) + { + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + } + if(FPRdouble::IsNaN(a)) + { + CPU.FPR[frd] = SilenceNaN(a); + } + else if(FPRdouble::IsNaN(b)) + { + CPU.FPR[frd] = SilenceNaN(b); + } + else if(FPRdouble::IsINF(a) && FPRdouble::IsINF(b) && a != b) + { + CPU.SetFPSCRException(FPSCR_VXISI); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; + } + else + { + const double res = a + b; + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; + } CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } void FSQRT(u32 frd, u32 frb, bool rc) {FSQRT(frd, frb, rc, false);} void FSQRT(u32 frd, u32 frb, bool rc, bool single) { - const double res = sqrt(CPU.FPR[frb]); - if(single) CPU.FPR[frd] = (float)res; - else CPU.FPR[frd] = res; + const double b = CPU.FPR[frb]; + if(FPRdouble::IsSNaN(b)) + { + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + } + if(FPRdouble::IsNaN(b)) + { + CPU.FPR[frd] = SilenceNaN(b); + } + else if(b < 0.0) + { + CPU.SetFPSCRException(FPSCR_VXSQRT); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; + } + else + { + const double res = sqrt(b); + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; + } CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } @@ -3856,36 +4049,95 @@ private: void FMUL(u32 frd, u32 fra, u32 frc, bool rc) {FMUL(frd, fra, frc, rc, false);} void FMUL(u32 frd, u32 fra, u32 frc, bool rc, bool single) { - double res; - if((FPRdouble::IsINF(CPU.FPR[fra]) && CPU.FPR[frc] == 0.0) || (FPRdouble::IsINF(CPU.FPR[frc]) && CPU.FPR[fra] == 0.0)) + const double a = CPU.FPR[fra]; + const double c = CPU.FPR[frc]; + if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(c)) + { + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + } + if(FPRdouble::IsNaN(a)) + { + CPU.FPR[frd] = SilenceNaN(a); + } + else if(FPRdouble::IsNaN(c)) + { + CPU.FPR[frd] = SilenceNaN(c); + } + else if((FPRdouble::IsINF(a) && c == 0.0) || (a == 0.0 && FPRdouble::IsINF(c))) { CPU.SetFPSCRException(FPSCR_VXIMZ); - res = FPR_NAN; - CPU.FPSCR.FI = 0; CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; } else { - if(FPRdouble::IsSNaN(CPU.FPR[fra]) || FPRdouble::IsSNaN(CPU.FPR[frc])) - { - CPU.SetFPSCRException(FPSCR_VXSNAN); - } - - res = CPU.FPR[fra] * CPU.FPR[frc]; + const double res = a * c; + if(single) CPU.FPR[frd] = (float)res; + else CPU.FPR[frd] = res; } - - if(single) CPU.FPR[frd] = (float)res; - else CPU.FPR[frd] = res; CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } void FRSQRTE(u32 frd, u32 frb, bool rc) { - if(CPU.FPR[frb] == 0.0) + const double b = CPU.FPR[frb]; + if(FPRdouble::IsSNaN(b)) + { + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + } + if(FPRdouble::IsNaN(b)) + { + CPU.FPR[frd] = SilenceNaN(b); + } + else if(b < 0.0) + { + CPU.SetFPSCRException(FPSCR_VXSQRT); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; + } + else if(b == 0.0) { CPU.SetFPSCRException(FPSCR_ZX); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if (CPU.FPSCR.ZE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = 1.0 / b; } - CPU.FPR[frd] = 1.0 / sqrt(CPU.FPR[frb]); + else + { + CPU.FPR[frd] = 1.0 / sqrt(b); + } + CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, false, true, false);} @@ -3895,9 +4147,62 @@ private: const double a = CPU.FPR[fra]; const double b = CPU.FPR[frb]; const double c = CPU.FPR[frc]; - const double res = fma(a, c, sub ? -b : b); - if(single) CPU.FPR[frd] = (float)(neg ? -res : res); - else CPU.FPR[frd] = (neg ? -res : res); + if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b) || FPRdouble::IsSNaN(c)) + { + CPU.SetFPSCRException(FPSCR_VXSNAN); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + } + if(FPRdouble::IsNaN(a)) + { + CPU.FPR[frd] = SilenceNaN(a); + } + else if(FPRdouble::IsNaN(b)) + { + CPU.FPR[frd] = SilenceNaN(b); + } + else if(FPRdouble::IsNaN(c)) + { + CPU.FPR[frd] = SilenceNaN(c); + } + else if((FPRdouble::IsINF(a) && c == 0.0) || (a == 0.0 && FPRdouble::IsINF(c))) + { + CPU.SetFPSCRException(FPSCR_VXIMZ); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; + } + else + { + const double res = fma(a, c, sub ? -b : b); + if(FPRdouble::IsNaN(res)) + { + CPU.SetFPSCRException(FPSCR_VXISI); + CPU.FPSCR.FR = 0; + CPU.FPSCR.FI = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + CPU.FPR[frd] = FPR_NAN; + } + else + { + if(single) CPU.FPR[frd] = (float)(neg ? -res : res); + else CPU.FPR[frd] = (neg ? -res : res); + } + } CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); if(rc) CPU.UpdateCR1(); } @@ -3950,19 +4255,30 @@ private: { const double b = CPU.FPR[frb]; u64 r; - if(b >= (double)0x8000000000000000) + if (FPRdouble::IsNaN(b) || b < -(double)0x8000000000000000) { - r = 0x7fffffffffffffff; CPU.SetFPSCRException(FPSCR_VXCVI); + if(FPRdouble::IsSNaN(b)) CPU.SetFPSCRException(FPSCR_VXSNAN); CPU.FPSCR.FI = 0; CPU.FPSCR.FR = 0; - } - else if (b < -(double)0x8000000000000000) - { + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } r = 0x8000000000000000; + } + else if(b >= (double)0x8000000000000000) + { CPU.SetFPSCRException(FPSCR_VXCVI); CPU.FPSCR.FI = 0; CPU.FPSCR.FR = 0; + if(CPU.FPSCR.VE) + { + if(rc) CPU.UpdateCR1(); + return; + } + r = 0x7fffffffffffffff; } else { From 0fa0946a461addb8f6900b9fe59fe8ce71bcd1b4 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:04:56 +0900 Subject: [PATCH 16/21] Fix vsumsws clobbering input registers and reading from the wrong word. --- rpcs3/Emu/Cell/PPUInterpreter.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index c1ff4a8db1..71e727fedb 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -1930,15 +1930,14 @@ private: } void VSUMSWS(u32 vd, u32 va, u32 vb) { - CPU.VPR[vd].clear(); - - s64 sum = CPU.VPR[vb]._s32[3]; + s64 sum = CPU.VPR[vb]._s32[0]; for (uint w = 0; w < 4; w++) { sum += CPU.VPR[va]._s32[w]; } + CPU.VPR[vd].clear(); if (sum > INT32_MAX) { CPU.VPR[vd]._s32[0] = (s32)INT32_MAX; From 2e2dbf6c3371454878c59a082bca7fc8c1a79931 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:05:08 +0900 Subject: [PATCH 17/21] Reduce code duplication for vcmp instructions. --- rpcs3/Emu/Cell/PPUInterpreter.h | 180 ++++++++++---------------------- 1 file changed, 55 insertions(+), 125 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 71e727fedb..080ee0e9cd 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -448,22 +448,7 @@ private: CPU.VPR[vd]._f[w] = ((float)CPU.VPR[vb]._u32[w]) / scale; } } - void VCMPBFP(u32 vd, u32 va, u32 vb) - { - for (uint w = 0; w < 4; w++) - { - u32 mask = 0; - - const float A = CheckVSCR_NJ(CPU.VPR[va]._f[w]); - const float B = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); - - if (A > B) mask |= 1 << 31; - if (A < -B) mask |= 1 << 30; - - CPU.VPR[vd]._u32[w] = mask; - } - } - void VCMPBFP_(u32 vd, u32 va, u32 vb) + void VCMPBFP(u32 vd, u32 va, u32 vb, bool rc) { bool allInBounds = true; @@ -483,18 +468,16 @@ private: allInBounds = false; } - // Bit n°2 of CR6 - CPU.SetCR(6, 0); - CPU.SetCRBit(6, 0x2, allInBounds); - } - void VCMPEQFP(u32 vd, u32 va, u32 vb) - { - for (uint w = 0; w < 4; w++) + if (rc) { - CPU.VPR[vd]._u32[w] = CPU.VPR[va]._f[w] == CPU.VPR[vb]._f[w] ? 0xffffffff : 0; + // Bit n°2 of CR6 + CPU.SetCR(6, 0); + CPU.SetCRBit(6, 0x2, allInBounds); } } - void VCMPEQFP_(u32 vd, u32 va, u32 vb) + void VCMPBFP(u32 vd, u32 va, u32 vb) {VCMPBFP(vd, va, vb, false);} + void VCMPBFP_(u32 vd, u32 va, u32 vb) {VCMPBFP(vd, va, vb, true);} + void VCMPEQFP(u32 vd, u32 va, u32 vb, bool rc) { int all_equal = 0x8; int none_equal = 0x2; @@ -513,16 +496,11 @@ private: } } - CPU.CR.cr6 = all_equal | none_equal; + if (rc) CPU.CR.cr6 = all_equal | none_equal; } - void VCMPEQUB(u32 vd, u32 va, u32 vb) - { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[vd]._u8[b] = CPU.VPR[va]._u8[b] == CPU.VPR[vb]._u8[b] ? 0xff : 0; - } - } - void VCMPEQUB_(u32 vd, u32 va, u32 vb) + void VCMPEQFP(u32 vd, u32 va, u32 vb) {VCMPEQFP(vd, va, vb, false);} + void VCMPEQFP_(u32 vd, u32 va, u32 vb) {VCMPEQFP(vd, va, vb, true);} + void VCMPEQUB(u32 vd, u32 va, u32 vb, bool rc) { int all_equal = 0x8; int none_equal = 0x2; @@ -541,16 +519,11 @@ private: } } - CPU.CR.cr6 = all_equal | none_equal; + if (rc) CPU.CR.cr6 = all_equal | none_equal; } - void VCMPEQUH(u32 vd, u32 va, u32 vb) //nf - { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] == CPU.VPR[vb]._u16[h] ? 0xffff : 0; - } - } - void VCMPEQUH_(u32 vd, u32 va, u32 vb) //nf + void VCMPEQUB(u32 vd, u32 va, u32 vb) {VCMPEQUB(vd, va, vb, false);} + void VCMPEQUB_(u32 vd, u32 va, u32 vb) {VCMPEQUB(vd, va, vb, true);} + void VCMPEQUH(u32 vd, u32 va, u32 vb, bool rc) //nf { int all_equal = 0x8; int none_equal = 0x2; @@ -569,16 +542,11 @@ private: } } - CPU.CR.cr6 = all_equal | none_equal; + if (rc) CPU.CR.cr6 = all_equal | none_equal; } - void VCMPEQUW(u32 vd, u32 va, u32 vb) - { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] == CPU.VPR[vb]._u32[w] ? 0xffffffff : 0; - } - } - void VCMPEQUW_(u32 vd, u32 va, u32 vb) + void VCMPEQUH(u32 vd, u32 va, u32 vb) {VCMPEQUH(vd, va, vb, false);} + void VCMPEQUH_(u32 vd, u32 va, u32 vb) {VCMPEQUH(vd, va, vb, true);} + void VCMPEQUW(u32 vd, u32 va, u32 vb, bool rc) { int all_equal = 0x8; int none_equal = 0x2; @@ -597,16 +565,11 @@ private: } } - CPU.CR.cr6 = all_equal | none_equal; + if (rc) CPU.CR.cr6 = all_equal | none_equal; } - void VCMPGEFP(u32 vd, u32 va, u32 vb) - { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[vd]._u32[w] = CPU.VPR[va]._f[w] >= CPU.VPR[vb]._f[w] ? 0xffffffff : 0; - } - } - void VCMPGEFP_(u32 vd, u32 va, u32 vb) + void VCMPEQUW(u32 vd, u32 va, u32 vb) {VCMPEQUW(vd, va, vb, false);} + void VCMPEQUW_(u32 vd, u32 va, u32 vb) {VCMPEQUW(vd, va, vb, true);} + void VCMPGEFP(u32 vd, u32 va, u32 vb, bool rc) { int all_ge = 0x8; int none_ge = 0x2; @@ -625,16 +588,11 @@ private: } } - CPU.CR.cr6 = all_ge | none_ge; + if (rc) CPU.CR.cr6 = all_ge | none_ge; } - void VCMPGTFP(u32 vd, u32 va, u32 vb) - { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[vd]._u32[w] = CPU.VPR[va]._f[w] > CPU.VPR[vb]._f[w] ? 0xffffffff : 0; - } - } - void VCMPGTFP_(u32 vd, u32 va, u32 vb) + void VCMPGEFP(u32 vd, u32 va, u32 vb) {VCMPGEFP(vd, va, vb, false);} + void VCMPGEFP_(u32 vd, u32 va, u32 vb) {VCMPGEFP(vd, va, vb, true);} + void VCMPGTFP(u32 vd, u32 va, u32 vb, bool rc) { int all_ge = 0x8; int none_ge = 0x2; @@ -653,16 +611,11 @@ private: } } - CPU.CR.cr6 = all_ge | none_ge; + if (rc) CPU.CR.cr6 = all_ge | none_ge; } - void VCMPGTSB(u32 vd, u32 va, u32 vb) //nf - { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[vd]._u8[b] = CPU.VPR[va]._s8[b] > CPU.VPR[vb]._s8[b] ? 0xff : 0; - } - } - void VCMPGTSB_(u32 vd, u32 va, u32 vb) + void VCMPGTFP(u32 vd, u32 va, u32 vb) {VCMPGTFP(vd, va, vb, false);} + void VCMPGTFP_(u32 vd, u32 va, u32 vb) {VCMPGTFP(vd, va, vb, true);} + void VCMPGTSB(u32 vd, u32 va, u32 vb, bool rc) //nf { int all_gt = 0x8; int none_gt = 0x2; @@ -681,16 +634,11 @@ private: } } - CPU.CR.cr6 = all_gt | none_gt; + if (rc) CPU.CR.cr6 = all_gt | none_gt; } - void VCMPGTSH(u32 vd, u32 va, u32 vb) - { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[vd]._u16[h] = CPU.VPR[va]._s16[h] > CPU.VPR[vb]._s16[h] ? 0xffff : 0; - } - } - void VCMPGTSH_(u32 vd, u32 va, u32 vb) + void VCMPGTSB(u32 vd, u32 va, u32 vb) {VCMPGTSB(vd, va, vb, false);} + void VCMPGTSB_(u32 vd, u32 va, u32 vb) {VCMPGTSB(vd, va, vb, true);} + void VCMPGTSH(u32 vd, u32 va, u32 vb, bool rc) { int all_gt = 0x8; int none_gt = 0x2; @@ -709,16 +657,11 @@ private: } } - CPU.CR.cr6 = all_gt | none_gt; + if (rc) CPU.CR.cr6 = all_gt | none_gt; } - void VCMPGTSW(u32 vd, u32 va, u32 vb) - { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[vd]._u32[w] = CPU.VPR[va]._s32[w] > CPU.VPR[vb]._s32[w] ? 0xffffffff : 0; - } - } - void VCMPGTSW_(u32 vd, u32 va, u32 vb) + void VCMPGTSH(u32 vd, u32 va, u32 vb) {VCMPGTSH(vd, va, vb, false);} + void VCMPGTSH_(u32 vd, u32 va, u32 vb) {VCMPGTSH(vd, va, vb, true);} + void VCMPGTSW(u32 vd, u32 va, u32 vb, bool rc) { int all_gt = 0x8; int none_gt = 0x2; @@ -737,16 +680,11 @@ private: } } - CPU.CR.cr6 = all_gt | none_gt; + if (rc) CPU.CR.cr6 = all_gt | none_gt; } - void VCMPGTUB(u32 vd, u32 va, u32 vb) - { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[vd]._u8[b] = CPU.VPR[va]._u8[b] > CPU.VPR[vb]._u8[b] ? 0xff : 0; - } - } - void VCMPGTUB_(u32 vd, u32 va, u32 vb) + void VCMPGTSW(u32 vd, u32 va, u32 vb) {VCMPGTSW(vd, va, vb, false);} + void VCMPGTSW_(u32 vd, u32 va, u32 vb) {VCMPGTSW(vd, va, vb, true);} + void VCMPGTUB(u32 vd, u32 va, u32 vb, bool rc) { int all_gt = 0x8; int none_gt = 0x2; @@ -765,16 +703,11 @@ private: } } - CPU.CR.cr6 = all_gt | none_gt; + if (rc) CPU.CR.cr6 = all_gt | none_gt; } - void VCMPGTUH(u32 vd, u32 va, u32 vb) - { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] > CPU.VPR[vb]._u16[h] ? 0xffff : 0; - } - } - void VCMPGTUH_(u32 vd, u32 va, u32 vb) + void VCMPGTUB(u32 vd, u32 va, u32 vb) {VCMPGTUB(vd, va, vb, false);} + void VCMPGTUB_(u32 vd, u32 va, u32 vb) {VCMPGTUB(vd, va, vb, true);} + void VCMPGTUH(u32 vd, u32 va, u32 vb, bool rc) { int all_gt = 0x8; int none_gt = 0x2; @@ -793,16 +726,11 @@ private: } } - CPU.CR.cr6 = all_gt | none_gt; + if (rc) CPU.CR.cr6 = all_gt | none_gt; } - void VCMPGTUW(u32 vd, u32 va, u32 vb) - { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] > CPU.VPR[vb]._u32[w] ? 0xffffffff : 0; - } - } - void VCMPGTUW_(u32 vd, u32 va, u32 vb) + void VCMPGTUH(u32 vd, u32 va, u32 vb) {VCMPGTUH(vd, va, vb, false);} + void VCMPGTUH_(u32 vd, u32 va, u32 vb) {VCMPGTUH(vd, va, vb, true);} + void VCMPGTUW(u32 vd, u32 va, u32 vb, bool rc) { int all_gt = 0x8; int none_gt = 0x2; @@ -821,8 +749,10 @@ private: } } - CPU.CR.cr6 = all_gt | none_gt; + if (rc) CPU.CR.cr6 = all_gt | none_gt; } + void VCMPGTUW(u32 vd, u32 va, u32 vb) {VCMPGTUW(vd, va, vb, false);} + void VCMPGTUW_(u32 vd, u32 va, u32 vb) {VCMPGTUW(vd, va, vb, true);} void VCTSXS(u32 vd, u32 uimm5, u32 vb) { u32 nScale = 1 << uimm5; From 10c706d9a5fa84fa245daffe53e756c2017fb26a Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:05:18 +0900 Subject: [PATCH 18/21] Handle NaNs correctly in vector FP instructions. --- rpcs3/Emu/Cell/PPUInterpreter.h | 184 +++++++++++++++++++++++++------- 1 file changed, 148 insertions(+), 36 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 080ee0e9cd..03fd59d3a2 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -247,7 +247,16 @@ private: { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = CPU.VPR[va]._f[w] + CPU.VPR[vb]._f[w]; + const float a = CPU.VPR[va]._f[w]; + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(a)) + CPU.VPR[vd]._f[w] = SilenceNaN(a); + else if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else if (std::isinf(a) && std::isinf(b) && a != b) + CPU.VPR[vd]._f[w] = (float)FPR_NAN; + else + CPU.VPR[vd]._f[w] = a + b; } } void VADDSBS(u32 vd, u32 va, u32 vb) //nf @@ -759,20 +768,27 @@ private: for (uint w = 0; w < 4; w++) { - double result = (double)CPU.VPR[vb]._f[w] * nScale; - - if (result > 0x7fffffff) + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) { - CPU.VPR[vd]._s32[w] = (int)0x7fffffff; - CPU.VSCR.SAT = 1; + CPU.VPR[vd]._s32[w] = 0; } - else if (result < -pow(2, 31)) + else { - CPU.VPR[vd]._s32[w] = (int)0x80000000; - CPU.VSCR.SAT = 1; + double result = (double)b * nScale; + if (result > 0x7fffffff) + { + CPU.VPR[vd]._s32[w] = (int)0x7fffffff; + CPU.VSCR.SAT = 1; + } + else if (result < -pow(2, 31)) + { + CPU.VPR[vd]._s32[w] = (int)0x80000000; + CPU.VSCR.SAT = 1; + } + else // C rounding = Round towards 0 + CPU.VPR[vd]._s32[w] = (int)result; } - else // C rounding = Round towards 0 - CPU.VPR[vd]._s32[w] = (int)result; } } void VCTUXS(u32 vd, u32 uimm5, u32 vb) @@ -781,21 +797,28 @@ private: for (uint w = 0; w < 4; w++) { - // C rounding = Round towards 0 - double result = (double)CPU.VPR[vb]._f[w] * nScale; - - if (result > 0xffffffffu) + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) { - CPU.VPR[vd]._u32[w] = 0xffffffffu; - CPU.VSCR.SAT = 1; - } - else if (result < 0) - { - CPU.VPR[vd]._u32[w] = 0; - CPU.VSCR.SAT = 1; + CPU.VPR[vd]._s32[w] = 0; } else - CPU.VPR[vd]._u32[w] = (u32)result; + { + // C rounding = Round towards 0 + double result = (double)b * nScale; + if (result > 0xffffffffu) + { + CPU.VPR[vd]._u32[w] = 0xffffffffu; + CPU.VSCR.SAT = 1; + } + else if (result < 0) + { + CPU.VPR[vd]._u32[w] = 0; + CPU.VSCR.SAT = 1; + } + else + CPU.VPR[vd]._u32[w] = (u32)result; + } } } void VEXPTEFP(u32 vd, u32 vb) @@ -805,7 +828,11 @@ private: // and between different executions on the same implementation. for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = powf(2.0f, CPU.VPR[vb]._f[w]); + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = powf(2.0f, b); } } void VLOGEFP(u32 vd, u32 vb) @@ -814,21 +841,48 @@ private: // and between different executions on the same implementation. for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = log2f(CPU.VPR[vb]._f[w]); + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = log2f(b); } } void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = CPU.VPR[va]._f[w] * CPU.VPR[vc]._f[w] + CPU.VPR[vb]._f[w]; + const float a = CPU.VPR[va]._f[w]; + const float b = CPU.VPR[vb]._f[w]; + const float c = CPU.VPR[vc]._f[w]; + if (std::isnan(a)) + CPU.VPR[vd]._f[w] = SilenceNaN(a); + else if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else if (std::isnan(c)) + CPU.VPR[vd]._f[w] = SilenceNaN(c); + else if ((std::isinf(a) && c == 0) || (a == 0 && std::isinf(c))) + CPU.VPR[vd]._f[w] = (float)FPR_NAN; + else + { + CPU.VPR[vd]._f[w] = a * c + b; + if (std::isnan(CPU.VPR[vd]._f[w])) + CPU.VPR[vd]._f[w] = (float)FPR_NAN; + } } } void VMAXFP(u32 vd, u32 va, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = std::max(CPU.VPR[va]._f[w], CPU.VPR[vb]._f[w]); + const float a = CPU.VPR[va]._f[w]; + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(a)) + CPU.VPR[vd]._f[w] = SilenceNaN(a); + else if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = std::max(a, b); } } void VMAXSB(u32 vd, u32 va, u32 vb) //nf @@ -915,7 +969,14 @@ private: { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = std::min(CPU.VPR[va]._f[w], CPU.VPR[vb]._f[w]); + const float a = CPU.VPR[va]._f[w]; + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(a)) + CPU.VPR[vd]._f[w] = SilenceNaN(a); + else if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = std::min(a, b); } } void VMINSB(u32 vd, u32 va, u32 vb) //nf @@ -1202,7 +1263,23 @@ private: { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = -(CPU.VPR[va]._f[w] * CPU.VPR[vc]._f[w] - CPU.VPR[vb]._f[w]); + const float a = CPU.VPR[va]._f[w]; + const float b = CPU.VPR[vb]._f[w]; + const float c = CPU.VPR[vc]._f[w]; + if (std::isnan(a)) + CPU.VPR[vd]._f[w] = SilenceNaN(a); + else if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else if (std::isnan(c)) + CPU.VPR[vd]._f[w] = SilenceNaN(c); + else if ((std::isinf(a) && c == 0) || (a == 0 && std::isinf(c))) + CPU.VPR[vd]._f[w] = (float)FPR_NAN; + else + { + CPU.VPR[vd]._f[w] = -(a * c - b); + if (std::isnan(CPU.VPR[vd]._f[w])) + CPU.VPR[vd]._f[w] = (float)FPR_NAN; + } } } void VNOR(u32 vd, u32 va, u32 vb) @@ -1477,35 +1554,55 @@ private: { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = 1.0f / CPU.VPR[vb]._f[w]; + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = 1.0f / b; } } void VRFIM(u32 vd, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = floorf(CPU.VPR[vb]._f[w]); + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = floorf(CPU.VPR[vb]._f[w]); } } void VRFIN(u32 vd, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = nearbyintf(CPU.VPR[vb]._f[w]); + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = nearbyintf(CPU.VPR[vb]._f[w]); } } void VRFIP(u32 vd, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = ceilf(CPU.VPR[vb]._f[w]); + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = ceilf(CPU.VPR[vb]._f[w]); } } void VRFIZ(u32 vd, u32 vb) { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = truncf(CPU.VPR[vb]._f[w]); + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else + CPU.VPR[vd]._f[w] = truncf(CPU.VPR[vb]._f[w]); } } void VRLB(u32 vd, u32 va, u32 vb) //nf @@ -1536,7 +1633,13 @@ private: for (uint w = 0; w < 4; w++) { //TODO: accurate div - CPU.VPR[vd]._f[w] = 1.0f / sqrtf(CPU.VPR[vb]._f[w]); + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else if (b < 0) + CPU.VPR[vd]._f[w] = (float)FPR_NAN; + else + CPU.VPR[vd]._f[w] = 1.0f / sqrtf(b); } } void VSEL(u32 vd, u32 va, u32 vb, u32 vc) @@ -1729,7 +1832,16 @@ private: { for (uint w = 0; w < 4; w++) { - CPU.VPR[vd]._f[w] = CPU.VPR[va]._f[w] - CPU.VPR[vb]._f[w]; + const float a = CPU.VPR[va]._f[w]; + const float b = CPU.VPR[vb]._f[w]; + if (std::isnan(a)) + CPU.VPR[vd]._f[w] = SilenceNaN(a); + else if (std::isnan(b)) + CPU.VPR[vd]._f[w] = SilenceNaN(b); + else if (std::isinf(a) && std::isinf(b) && a == b) + CPU.VPR[vd]._f[w] = (float)FPR_NAN; + else + CPU.VPR[vd]._f[w] = a - b; } } void VSUBSBS(u32 vd, u32 va, u32 vb) //nf From 3eb3f66e96ce975e7a87a66ffcbdc85618464386 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:05:30 +0900 Subject: [PATCH 19/21] Fix vmaxfp/vminfp with mixed signs of zero. --- rpcs3/Emu/Cell/PPUInterpreter.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 03fd59d3a2..ed235ed14d 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -463,13 +463,13 @@ private: for (uint w = 0; w < 4; w++) { - u32 mask = 0; + u32 mask = 1<<31 | 1<<30; const float A = CheckVSCR_NJ(CPU.VPR[va]._f[w]); const float B = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); - if (A > B) mask |= 1 << 31; - if (A < -B) mask |= 1 << 30; + if (A <= B) mask &= ~(1 << 31); + if (A >= -B) mask &= ~(1 << 30); CPU.VPR[vd]._u32[w] = mask; @@ -881,8 +881,14 @@ private: CPU.VPR[vd]._f[w] = SilenceNaN(a); else if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); + else if (a > b) + CPU.VPR[vd]._f[w] = a; + else if (b > a) + CPU.VPR[vd]._f[w] = b; + else if (CPU.VPR[vb]._u32[w] == 0x80000000) + CPU.VPR[vd]._f[w] = a; // max(+0,-0) = +0 else - CPU.VPR[vd]._f[w] = std::max(a, b); + CPU.VPR[vd]._f[w] = b; } } void VMAXSB(u32 vd, u32 va, u32 vb) //nf @@ -975,8 +981,14 @@ private: CPU.VPR[vd]._f[w] = SilenceNaN(a); else if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); + else if (a < b) + CPU.VPR[vd]._f[w] = a; + else if (b < a) + CPU.VPR[vd]._f[w] = b; + else if (CPU.VPR[vb]._u32[w] == 0x00000000) + CPU.VPR[vd]._f[w] = a; // min(-0,+0) = -0 else - CPU.VPR[vd]._f[w] = std::min(a, b); + CPU.VPR[vd]._f[w] = b; } } void VMINSB(u32 vd, u32 va, u32 vb) //nf From 9a310c312e63f227820611e87d841136d666a149 Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:05:38 +0900 Subject: [PATCH 20/21] Fix intermediate truncation in vector FP fused multiply-add. --- rpcs3/Emu/Cell/PPUInterpreter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index ed235ed14d..ecf0f5f4c5 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -865,7 +865,7 @@ private: CPU.VPR[vd]._f[w] = (float)FPR_NAN; else { - CPU.VPR[vd]._f[w] = a * c + b; + CPU.VPR[vd]._f[w] = fmaf(a, c, b); if (std::isnan(CPU.VPR[vd]._f[w])) CPU.VPR[vd]._f[w] = (float)FPR_NAN; } @@ -1288,7 +1288,7 @@ private: CPU.VPR[vd]._f[w] = (float)FPR_NAN; else { - CPU.VPR[vd]._f[w] = -(a * c - b); + CPU.VPR[vd]._f[w] = -fmaf(a, c, -b); if (std::isnan(CPU.VPR[vd]._f[w])) CPU.VPR[vd]._f[w] = (float)FPR_NAN; } From c9df9c93ed3e1f7093f49eb64eddbf3d93879e3e Mon Sep 17 00:00:00 2001 From: Andrew Church Date: Sun, 18 Jan 2015 07:05:48 +0900 Subject: [PATCH 21/21] Implement VSCR_NJ for all vector FP instructions. --- rpcs3/Emu/Cell/PPUInterpreter.h | 80 +++++++++++++++++---------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index ecf0f5f4c5..070de70f58 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -247,8 +247,8 @@ private: { for (uint w = 0; w < 4; w++) { - const float a = CPU.VPR[va]._f[w]; - const float b = CPU.VPR[vb]._f[w]; + const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(a)) CPU.VPR[vd]._f[w] = SilenceNaN(a); else if (std::isnan(b)) @@ -256,7 +256,7 @@ private: else if (std::isinf(a) && std::isinf(b) && a != b) CPU.VPR[vd]._f[w] = (float)FPR_NAN; else - CPU.VPR[vd]._f[w] = a + b; + CPU.VPR[vd]._f[w] = CheckVSCR_NJ(a + b); } } void VADDSBS(u32 vd, u32 va, u32 vb) //nf @@ -465,11 +465,11 @@ private: { u32 mask = 1<<31 | 1<<30; - const float A = CheckVSCR_NJ(CPU.VPR[va]._f[w]); - const float B = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); + const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); - if (A <= B) mask &= ~(1 << 31); - if (A >= -B) mask &= ~(1 << 30); + if (a <= b) mask &= ~(1 << 31); + if (a >= -b) mask &= ~(1 << 30); CPU.VPR[vd]._u32[w] = mask; @@ -768,7 +768,7 @@ private: for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) { CPU.VPR[vd]._s32[w] = 0; @@ -797,7 +797,7 @@ private: for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) { CPU.VPR[vd]._s32[w] = 0; @@ -828,11 +828,11 @@ private: // and between different executions on the same implementation. for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); else - CPU.VPR[vd]._f[w] = powf(2.0f, b); + CPU.VPR[vd]._f[w] = CheckVSCR_NJ(powf(2.0f, b)); } } void VLOGEFP(u32 vd, u32 vb) @@ -841,20 +841,20 @@ private: // and between different executions on the same implementation. for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); else - CPU.VPR[vd]._f[w] = log2f(b); + CPU.VPR[vd]._f[w] = log2f(b); // Can never be denormal. } } void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { for (uint w = 0; w < 4; w++) { - const float a = CPU.VPR[va]._f[w]; - const float b = CPU.VPR[vb]._f[w]; - const float c = CPU.VPR[vc]._f[w]; + const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); + const float c = CheckVSCR_NJ(CPU.VPR[vc]._f[w]); if (std::isnan(a)) CPU.VPR[vd]._f[w] = SilenceNaN(a); else if (std::isnan(b)) @@ -865,9 +865,11 @@ private: CPU.VPR[vd]._f[w] = (float)FPR_NAN; else { - CPU.VPR[vd]._f[w] = fmaf(a, c, b); - if (std::isnan(CPU.VPR[vd]._f[w])) + const float result = fmaf(a, c, b); + if (std::isnan(result)) CPU.VPR[vd]._f[w] = (float)FPR_NAN; + else + CPU.VPR[vd]._f[w] = CheckVSCR_NJ(result); } } } @@ -875,8 +877,8 @@ private: { for (uint w = 0; w < 4; w++) { - const float a = CPU.VPR[va]._f[w]; - const float b = CPU.VPR[vb]._f[w]; + const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(a)) CPU.VPR[vd]._f[w] = SilenceNaN(a); else if (std::isnan(b)) @@ -975,8 +977,8 @@ private: { for (uint w = 0; w < 4; w++) { - const float a = CPU.VPR[va]._f[w]; - const float b = CPU.VPR[vb]._f[w]; + const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(a)) CPU.VPR[vd]._f[w] = SilenceNaN(a); else if (std::isnan(b)) @@ -1275,9 +1277,9 @@ private: { for (uint w = 0; w < 4; w++) { - const float a = CPU.VPR[va]._f[w]; - const float b = CPU.VPR[vb]._f[w]; - const float c = CPU.VPR[vc]._f[w]; + const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); + const float c = CheckVSCR_NJ(CPU.VPR[vc]._f[w]); if (std::isnan(a)) CPU.VPR[vd]._f[w] = SilenceNaN(a); else if (std::isnan(b)) @@ -1288,9 +1290,11 @@ private: CPU.VPR[vd]._f[w] = (float)FPR_NAN; else { - CPU.VPR[vd]._f[w] = -fmaf(a, c, -b); - if (std::isnan(CPU.VPR[vd]._f[w])) + const float result = -fmaf(a, c, -b); + if (std::isnan(result)) CPU.VPR[vd]._f[w] = (float)FPR_NAN; + else + CPU.VPR[vd]._f[w] = CheckVSCR_NJ(result); } } } @@ -1566,18 +1570,18 @@ private: { for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); else - CPU.VPR[vd]._f[w] = 1.0f / b; + CPU.VPR[vd]._f[w] = CheckVSCR_NJ(1.0f / b); } } void VRFIM(u32 vd, u32 vb) { for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); else @@ -1588,7 +1592,7 @@ private: { for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); else @@ -1599,7 +1603,7 @@ private: { for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); else @@ -1610,7 +1614,7 @@ private: { for (uint w = 0; w < 4; w++) { - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); else @@ -1645,13 +1649,13 @@ private: for (uint w = 0; w < 4; w++) { //TODO: accurate div - const float b = CPU.VPR[vb]._f[w]; + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(b)) CPU.VPR[vd]._f[w] = SilenceNaN(b); else if (b < 0) CPU.VPR[vd]._f[w] = (float)FPR_NAN; else - CPU.VPR[vd]._f[w] = 1.0f / sqrtf(b); + CPU.VPR[vd]._f[w] = 1.0f / sqrtf(b); // Can never be denormal. } } void VSEL(u32 vd, u32 va, u32 vb, u32 vc) @@ -1844,8 +1848,8 @@ private: { for (uint w = 0; w < 4; w++) { - const float a = CPU.VPR[va]._f[w]; - const float b = CPU.VPR[vb]._f[w]; + const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); + const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); if (std::isnan(a)) CPU.VPR[vd]._f[w] = SilenceNaN(a); else if (std::isnan(b)) @@ -1853,7 +1857,7 @@ private: else if (std::isinf(a) && std::isinf(b) && a == b) CPU.VPR[vd]._f[w] = (float)FPR_NAN; else - CPU.VPR[vd]._f[w] = a - b; + CPU.VPR[vd]._f[w] = CheckVSCR_NJ(a - b); } } void VSUBSBS(u32 vd, u32 va, u32 vb) //nf