From a9d0ffcac1b4ad33f2f2c99c40287a43c0b1eece Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Sun, 12 Jul 2020 18:21:23 -0400 Subject: [PATCH] SPU LLVM: Avoid additional endian swapping - Avoid additional endian swapping with the ROTQBY and ROTQBYBI instructions - ROTQBYI is left out intentionally, since it caused worse codegen --- rpcs3/Emu/Cell/SPURecompiler.cpp | 62 +++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 661d1470a9..20fc412b19 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -6435,11 +6435,29 @@ public: set_vr(op.rt, sext(m)); } + template + static auto byteswap(TA&& a) + { + return zshuffle(std::forward(a), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + } + void ROTQBYBI(spu_opcode_t op) { + const auto a = get_vr(op.ra); + + // Data with swapped endian from a load instruction + if (auto [ok, v0] = match_expr(a, byteswap(match())); ok) + { + const auto as = byteswap(a); + const auto sc = build(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + const auto sh = (sc + (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)) & 0xf; + set_vr(op.rt, pshufb(as, sh)); + return; + } + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto sh = (sc - (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)) & 0xf; - set_vr(op.rt, pshufb(get_vr(op.ra), sh)); + set_vr(op.rt, pshufb(a, sh)); } void ROTQMBYBI(spu_opcode_t op) @@ -6557,6 +6575,16 @@ public: return; } + // Data with swapped endian from a load instruction + if (auto [ok, v0] = match_expr(a, byteswap(match())); ok) + { + const auto as = byteswap(a); + const auto sc = build(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + const auto sh = eval((sc + zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)) & 0xf); + set_vr(op.rt, pshufb(as, sh)); + return; + } + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto sh = eval((sc - zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)) & 0xf); set_vr(op.rt, pshufb(a, sh)); @@ -7143,12 +7171,6 @@ public: set_vr(op.rt4, (get_vr(op.rb) & c) | (get_vr(op.ra) & ~c)); } - template - static auto byteswap(TA&& a) - { - return zshuffle(std::forward(a), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - } - void SHUFB(spu_opcode_t op) // { if (match_vr(op.rc, [&](auto c, auto MP) @@ -7273,7 +7295,18 @@ public: { v128 data = get_const_vector(ci, m_pos, 7000); - if (data == v128{}) + bool all_bytes_equiv = true; + + for (u32 i = 0; i < 16; i++) + { + if (data._u8[0] != data._u8[i]) + { + all_bytes_equiv = false; + break; + } + } + + if (all_bytes_equiv) { // See above const auto x = avg(noncast(sext((c & 0xc0) == 0xc0)), noncast(sext((c & 0xe0) == 0xc0))); @@ -7291,7 +7324,18 @@ public: { v128 data = get_const_vector(ci, m_pos, 7000); - if (data == v128{}) + bool all_bytes_equiv = true; + + for (u32 i = 0; i < 16; i++) + { + if (data._u8[0] != data._u8[i]) + { + all_bytes_equiv = false; + break; + } + } + + if (all_bytes_equiv) { // See above const auto x = avg(noncast(sext((c & 0xc0) == 0xc0)), noncast(sext((c & 0xe0) == 0xc0)));