diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 10961d84b9..ee644ce0c4 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -3250,6 +3250,84 @@ public: }); } + // Infinite-precision shift left + template > + static auto inf_shl(T&& a, U&& b) + { + static constexpr u32 esz = llvm_value_t::esize; + + return expr(select(b < esz, a << b, splat(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple + { + static const auto M = match(); + + if (auto [ok, b, a, b2] = match_expr(value, _m, select(M < esz, M << M, splat(0))); ok && b.eq(b2)) + { + if (auto r1 = llvm_expr_t{}.match(a.value, _m); a.eq()) + { + if (auto r2 = llvm_expr_t{}.match(b.value, _m); b.eq()) + { + return std::tuple_cat(r1, r2); + } + } + } + + value = nullptr; + return {}; + }); + } + + // Infinite-precision logical shift right (unsigned) + template > + static auto inf_lshr(T&& a, U&& b) + { + static constexpr u32 esz = llvm_value_t::esize; + + return expr(select(b < esz, a >> b, splat(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple + { + static const auto M = match(); + + if (auto [ok, b, a, b2] = match_expr(value, _m, select(M < esz, M >> M, splat(0))); ok && b.eq(b2)) + { + if (auto r1 = llvm_expr_t{}.match(a.value, _m); a.eq()) + { + if (auto r2 = llvm_expr_t{}.match(b.value, _m); b.eq()) + { + return std::tuple_cat(r1, r2); + } + } + } + + value = nullptr; + return {}; + }); + } + + // Infinite-precision arithmetic shift right (signed) + template > + static auto inf_ashr(T&& a, U&& b) + { + static constexpr u32 esz = llvm_value_t::esize; + + return expr(a >> select(b > (esz - 1), splat(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple + { + static const auto M = match(); + + if (auto [ok, a, b, b2] = match_expr(value, _m, M >> select(M > (esz - 1), splat(esz - 1), M)); ok && b.eq(b2)) + { + if (auto r1 = llvm_expr_t{}.match(a.value, _m); a.eq()) + { + if (auto r2 = llvm_expr_t{}.match(b.value, _m); b.eq()) + { + return std::tuple_cat(r1, r2); + } + } + } + + value = nullptr; + return {}; + }); + } + template llvm::Function* get_intrinsic(llvm::Intrinsic::ID id) { diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 19c5543f69..0721528eae 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -6420,128 +6420,108 @@ public: set_vr(op.rt, absd(a, b)); } - template - void make_spu_rol(spu_opcode_t op, value_t by) - { - set_vr(op.rt, rol(get_vr(op.ra), by)); - } - - template - void make_spu_rotate_mask(spu_opcode_t op, value_t by) - { - value_t sh; - static_assert(sh.esize == by.esize); - sh.value = m_ir->CreateAnd(m_ir->CreateNeg(by.value), by.esize * 2 - 1); - if constexpr (!by.is_vector) - sh.value = m_ir->CreateVectorSplat(sh.is_vector, sh.value); - - set_vr(op.rt, select(sh < by.esize, get_vr(op.ra) >> sh, splat(0))); - } - - template - void make_spu_rotate_sext(spu_opcode_t op, value_t by) - { - value_t sh; - static_assert(sh.esize == by.esize); - sh.value = m_ir->CreateAnd(m_ir->CreateNeg(by.value), by.esize * 2 - 1); - if constexpr (!by.is_vector) - sh.value = m_ir->CreateVectorSplat(sh.is_vector, sh.value); - - value_t max_sh = eval(splat(by.esize - 1)); - sh.value = m_ir->CreateSelect(m_ir->CreateICmpUGT(max_sh.value, sh.value), sh.value, max_sh.value); - set_vr(op.rt, get_vr(op.ra) >> sh); - } - - template - void make_spu_shift_left(spu_opcode_t op, value_t by) - { - value_t sh; - static_assert(sh.esize == by.esize); - sh.value = m_ir->CreateAnd(by.value, by.esize * 2 - 1); - if constexpr (!by.is_vector) - sh.value = m_ir->CreateVectorSplat(sh.is_vector, sh.value); - - set_vr(op.rt, select(sh < by.esize, get_vr(op.ra) << sh, splat(0))); - } - void ROT(spu_opcode_t op) { - make_spu_rol(op, get_vr(op.rb)); + const auto [a, b] = get_vrs(op.ra, op.rb); + set_vr(op.rt, rol(a, b)); } void ROTM(spu_opcode_t op) { - make_spu_rotate_mask(op, get_vr(op.rb)); + const auto [a, b] = get_vrs(op.ra, op.rb); + set_vr(op.rt, inf_lshr(a, -b & 63)); } void ROTMA(spu_opcode_t op) { - make_spu_rotate_sext(op, get_vr(op.rb)); + const auto [a, b] = get_vrs(op.ra, op.rb); + set_vr(op.rt, inf_ashr(a, -b & 63)); } void SHL(spu_opcode_t op) { - make_spu_shift_left(op, get_vr(op.rb)); + const auto [a, b] = get_vrs(op.ra, op.rb); + set_vr(op.rt, inf_shl(a, b & 63)); } void ROTH(spu_opcode_t op) { - make_spu_rol(op, get_vr(op.rb)); + const auto [a, b] = get_vrs(op.ra, op.rb); + set_vr(op.rt, rol(a, b)); } void ROTHM(spu_opcode_t op) { - make_spu_rotate_mask(op, get_vr(op.rb)); + const auto [a, b] = get_vrs(op.ra, op.rb); + set_vr(op.rt, inf_lshr(a, -b & 31)); } void ROTMAH(spu_opcode_t op) { - make_spu_rotate_sext(op, get_vr(op.rb)); + const auto [a, b] = get_vrs(op.ra, op.rb); + set_vr(op.rt, inf_ashr(a, -b & 31)); } void SHLH(spu_opcode_t op) { - make_spu_shift_left(op, get_vr(op.rb)); + const auto [a, b] = get_vrs(op.ra, op.rb); + set_vr(op.rt, inf_shl(a, b & 31)); } void ROTI(spu_opcode_t op) { - make_spu_rol(op, get_imm(op.i7, false)); + const auto a = get_vr(op.ra); + const auto i = get_imm(op.i7, false); + set_vr(op.rt, rol(a, i)); } void ROTMI(spu_opcode_t op) { - make_spu_rotate_mask(op, get_imm(op.i7, false)); + const auto a = get_vr(op.ra); + const auto i = get_imm(op.i7, false); + set_vr(op.rt, inf_lshr(a, -i & 63)); } void ROTMAI(spu_opcode_t op) { - make_spu_rotate_sext(op, get_imm(op.i7, false)); + const auto a = get_vr(op.ra); + const auto i = get_imm(op.i7, false); + set_vr(op.rt, inf_ashr(a, -i & 63)); } void SHLI(spu_opcode_t op) { - make_spu_shift_left(op, get_imm(op.i7, false)); + const auto a = get_vr(op.ra); + const auto i = get_imm(op.i7, false); + set_vr(op.rt, inf_shl(a, i & 63)); } void ROTHI(spu_opcode_t op) { - make_spu_rol(op, get_imm(op.i7, false)); + const auto a = get_vr(op.ra); + const auto i = get_imm(op.i7, false); + set_vr(op.rt, rol(a, i)); } void ROTHMI(spu_opcode_t op) { - make_spu_rotate_mask(op, get_imm(op.i7, false)); + const auto a = get_vr(op.ra); + const auto i = get_imm(op.i7, false); + set_vr(op.rt, inf_lshr(a, -i & 31)); } void ROTMAHI(spu_opcode_t op) { - make_spu_rotate_sext(op, get_imm(op.i7, false)); + const auto a = get_vr(op.ra); + const auto i = get_imm(op.i7, false); + set_vr(op.rt, inf_ashr(a, -i & 31)); } void SHLHI(spu_opcode_t op) { - make_spu_shift_left(op, get_imm(op.i7, false)); + const auto a = get_vr(op.ra); + const auto i = get_imm(op.i7, false); + set_vr(op.rt, inf_shl(a, i & 31)); } void A(spu_opcode_t op)