diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index d622fcdf21..848eda53f8 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -386,6 +386,12 @@ struct llvm_match_t return value != nullptr; } + template + bool eq(const Args&... args) const + { + return value && ((value == args.value) && ...); + } + llvm::Value* eval(llvm::IRBuilder<>* ir) const { return value; @@ -405,6 +411,8 @@ struct llvm_match_t template >> struct llvm_placeholder_t { + // TODO: placeholder extracting actual constant values (u64, f64, vector, etc) + using type = T; llvm::Value* eval(llvm::IRBuilder<>* ir) const @@ -416,7 +424,7 @@ struct llvm_placeholder_t { if (value && value->getType() == llvm_value_t::get_type(value->getContext())) { - return {value}; + return {{value}}; } value = nullptr; @@ -1339,7 +1347,7 @@ struct llvm_cmp llvm::Value* v1 = {}; llvm::Value* v2 = {}; - if (auto i = llvm::dyn_cast_or_null(value); i && i->getOpcode() == pred) + if (auto i = llvm::dyn_cast_or_null(value); i && i->getPredicate() == pred) { v1 = i->getOperand(0); v2 = i->getOperand(1); @@ -1399,7 +1407,7 @@ struct llvm_ord llvm::Value* v1 = {}; llvm::Value* v2 = {}; - if (auto i = llvm::dyn_cast_or_null(value); i && i->getOpcode() == pred) + if (auto i = llvm::dyn_cast_or_null(value); i && i->getPredicate() == pred) { v1 = i->getOperand(0); v2 = i->getOperand(1); @@ -1452,7 +1460,7 @@ struct llvm_uno llvm::Value* v1 = {}; llvm::Value* v2 = {}; - if (auto i = llvm::dyn_cast_or_null(value); i && i->getOpcode() == pred) + if (auto i = llvm::dyn_cast_or_null(value); i && i->getPredicate() == pred) { v1 = i->getOperand(0); v2 = i->getOperand(1); @@ -1591,6 +1599,7 @@ struct llvm_bitcast using type = U; llvm_expr_t a1; + llvm::Module* module; static constexpr uint bitsize0 = llvm_value_t::is_vector ? llvm_value_t::is_vector * llvm_value_t::esize : llvm_value_t::esize; static constexpr uint bitsize1 = llvm_value_t::is_vector ? llvm_value_t::is_vector * llvm_value_t::esize : llvm_value_t::esize; @@ -1598,8 +1607,6 @@ struct llvm_bitcast static_assert(bitsize0 == bitsize1, "llvm_bitcast<>: invalid type (size mismatch)"); static_assert(llvm_value_t::is_int || llvm_value_t::is_float, "llvm_bitcast<>: invalid type"); static_assert(llvm_value_t::is_int || llvm_value_t::is_float, "llvm_bitcast<>: invalid result type"); - static_assert(llvm_value_t::is_int != llvm_value_t::is_int || llvm_value_t::is_vector != llvm_value_t::is_vector, - "llvm_bitcast<>: no-op cast (use noncast)"); static constexpr bool is_ok = bitsize0 && bitsize0 == bitsize1 && @@ -1611,9 +1618,13 @@ struct llvm_bitcast const auto v1 = a1.eval(ir); const auto rt = llvm_value_t::get_type(ir->getContext()); + if constexpr (llvm_value_t::is_int == llvm_value_t::is_int && llvm_value_t::is_vector == llvm_value_t::is_vector) + { + return v1; + } + if (const auto c1 = llvm::dyn_cast(v1)) { - const auto module = ir->GetInsertBlock()->getParent()->getParent(); const auto result = llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c1, rt, module->getDataLayout()); if (result) @@ -1627,6 +1638,19 @@ struct llvm_bitcast llvm_match_tuple match(llvm::Value*& value) const { + if constexpr (llvm_value_t::is_int == llvm_value_t::is_int && llvm_value_t::is_vector == llvm_value_t::is_vector) + { + if (value) + { + if (auto r1 = a1.match(value); value) + { + return r1; + } + } + + return {}; + } + llvm::Value* v1 = {}; if (auto i = llvm::dyn_cast_or_null(value); i && i->getOpcode() == llvm::Instruction::BitCast) @@ -1644,17 +1668,16 @@ struct llvm_bitcast if (auto c = llvm::dyn_cast_or_null(value)) { - // TODO - // const auto target = llvm_value_t::get_type(c->getContext()); + const auto target = llvm_value_t::get_type(c->getContext()); - // // Reverse bitcast on a constant - // if (llvm::Value* cv = llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c, target, module->getDataLayout())) - // { - // if (auto r1 = a1.match(cv); cv) - // { - // return r1; - // } - // } + // Reverse bitcast on a constant + if (llvm::Value* cv = llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c, target, module->getDataLayout())) + { + if (auto r1 = a1.match(cv); cv) + { + return r1; + } + } } value = nullptr; @@ -1859,24 +1882,40 @@ struct llvm_min static constexpr bool is_ok = llvm_value_t::is_sint || llvm_value_t::is_uint; + static constexpr auto pred = llvm_value_t::is_sint ? llvm::ICmpInst::ICMP_SLT : llvm::ICmpInst::ICMP_ULT; + llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); - - if constexpr (llvm_value_t::is_sint) - { - return ir->CreateSelect(ir->CreateICmpSLT(v1, v2), v1, v2); - } - - if constexpr (llvm_value_t::is_uint) - { - return ir->CreateSelect(ir->CreateICmpULT(v1, v2), v1, v2); - } + return ir->CreateSelect(ir->CreateICmp(pred, v1, v2), v1, v2); } llvm_match_tuple match(llvm::Value*& value) const { + llvm::Value* v1 = {}; + llvm::Value* v2 = {}; + + if (auto i = llvm::dyn_cast_or_null(value)) + { + v1 = i->getOperand(1); + v2 = i->getOperand(2); + + if (auto j = llvm::dyn_cast(i->getOperand(0)); j && j->getPredicate() == pred) + { + if (v1 == j->getOperand(0) && v2 == j->getOperand(1)) + { + if (auto r1 = a1.match(v1); v1) + { + if (auto r2 = a2.match(v2); v2) + { + return std::tuple_cat(r1, r2); + } + } + } + } + } + value = nullptr; return {}; } @@ -1892,24 +1931,40 @@ struct llvm_max static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_max<>: invalid type"); + static constexpr auto pred = llvm_value_t::is_sint ? llvm::ICmpInst::ICMP_SLT : llvm::ICmpInst::ICMP_ULT; + llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); - - if constexpr (llvm_value_t::is_sint) - { - return ir->CreateSelect(ir->CreateICmpSLT(v1, v2), v2, v1); - } - - if constexpr (llvm_value_t::is_uint) - { - return ir->CreateSelect(ir->CreateICmpULT(v1, v2), v2, v1); - } + return ir->CreateSelect(ir->CreateICmp(pred, v1, v2), v2, v1); } llvm_match_tuple match(llvm::Value*& value) const { + llvm::Value* v1 = {}; + llvm::Value* v2 = {}; + + if (auto i = llvm::dyn_cast_or_null(value)) + { + v1 = i->getOperand(2); + v2 = i->getOperand(1); + + if (auto j = llvm::dyn_cast(i->getOperand(0)); j && j->getPredicate() == pred) + { + if (v1 == j->getOperand(0) && v2 == j->getOperand(1)) + { + if (auto r1 = a1.match(v1); v1) + { + if (auto r2 = a2.match(v2); v2) + { + return std::tuple_cat(r1, r2); + } + } + } + } + } + value = nullptr; return {}; } @@ -2136,8 +2191,8 @@ struct llvm_insert if (auto i = llvm::dyn_cast_or_null(value)) { v1 = i->getOperand(0); - v2 = i->getOperand(1); - v3 = i->getOperand(2); + v2 = i->getOperand(2); + v3 = i->getOperand(1); if (auto r1 = a1.match(v1); v1) { @@ -2181,6 +2236,27 @@ struct llvm_splat llvm_match_tuple match(llvm::Value*& value) const { + llvm::Value* v1 = {}; + + if (auto i = llvm::dyn_cast_or_null(value)) + { + if (llvm::isa(i->getOperand(2))) + { + if (auto j = llvm::dyn_cast(i->getOperand(0))) + { + if (llvm::cast(j->getOperand(2))->isZero()) + { + v1 = j->getOperand(1); + + if (auto r1 = a1.match(v1); v1) + { + return r1; + } + } + } + } + } + value = nullptr; return {}; } @@ -2207,6 +2283,24 @@ struct llvm_zshuffle llvm_match_tuple match(llvm::Value*& value) const { + llvm::Value* v1 = {}; + + if (auto i = llvm::dyn_cast_or_null(value)) + { + v1 = i->getOperand(0); + + if (auto z = llvm::dyn_cast(i->getOperand(1)); z && z->getType() == v1->getType()) + { + if (llvm::ConstantDataVector::get(value->getContext(), index_array) == i->getOperand(2)) + { + if (auto r1 = a1.match(v1); v1) + { + return r1; + } + } + } + } + value = nullptr; return {}; } @@ -2235,6 +2329,29 @@ struct llvm_shuffle2 llvm_match_tuple match(llvm::Value*& value) const { + llvm::Value* v1 = {}; + llvm::Value* v2 = {}; + + if (auto i = llvm::dyn_cast_or_null(value)) + { + v1 = i->getOperand(0); + v2 = i->getOperand(1); + + if (v1->getType() == v2->getType() && v1->getType() == llvm_value_t::get_type(v1->getContext())) + { + if (llvm::ConstantDataVector::get(value->getContext(), index_array) == i->getOperand(2)) + { + if (auto r1 = a1.match(v1); v1) + { + if (auto r2 = a2.match(v2); v2) + { + return std::tuple_cat(r1, r2); + } + } + } + } + } + value = nullptr; return {}; } @@ -2304,6 +2421,27 @@ public: return result; } + template + static llvm_placeholder_t match() + { + return {}; + } + + template > + auto match_expr(T&& arg, U&& expr) -> decltype(std::tuple_cat(std::make_tuple(false), expr.match(std::declval()))) + { + auto v = arg.eval(m_ir); + auto r = expr.match(v); + return std::tuple_cat(std::make_tuple(v != nullptr), r); + } + + template + bool match_for(F&& pred) + { + // Execute pred(.) for each type until one of them returns true + return (pred(llvm_placeholder_t{}) || ...); + } + template >::value>> static auto fcmp_ord(T&& cmp_expr) { @@ -2323,9 +2461,9 @@ public: } template ::is_ok>> - static auto bitcast(T&& expr) + auto bitcast(T&& expr) { - return llvm_bitcast{std::forward(expr)}; + return llvm_bitcast{std::forward(expr), m_module}; } template ::is_ok>> diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index b7d2cd5e59..56a4f7c288 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -2478,27 +2478,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { verify("double_to_xfloat" HERE), val, val->getType() == get_type(); - // Detect xfloat_to_double to avoid unnecessary ops and prevent zeroed denormals - if (auto _bitcast = llvm::dyn_cast(val)) - { - if (_bitcast->getOpcode() == llvm::Instruction::BitCast) - { - if (auto _select = llvm::dyn_cast(_bitcast->getOperand(0))) - { - if (auto _icmp = llvm::dyn_cast(_select->getOperand(0))) - { - if (auto _and = llvm::dyn_cast(_icmp->getOperand(0))) - { - if (auto _zext = llvm::dyn_cast(_and->getOperand(0))) - { - // TODO: check all details and return xfloat_to_double() arg - } - } - } - } - } - } - const auto d = double_as_uint64(val); const auto s = m_ir->CreateAnd(m_ir->CreateLShr(d, 32), 0x80000000); const auto m = m_ir->CreateXor(m_ir->CreateLShr(d, 29), 0x40000000); @@ -2680,6 +2659,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return r; } + template + auto get_vr_as(U&&, const bf_t& index) + { + return get_vr::type>(index); + } + template std::tuple>...> get_vrs(const Args&... args) { @@ -2705,12 +2690,51 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return r; } + template + auto match_vr_as(U&&, const bf_t& index) + { + return match_vr::type>(index); + } + + template + bool match_vr(const bf_t& index, F&& pred) + { + return ((match_vr(index) && pred(match_vr(index), match())) || ...); + } + template std::tuple>...> match_vrs(const Args&... args) { return {match_vr(args)...}; } + // Extract scalar value from the preferred slot + template + auto get_scalar(T&& value) + { + using v_type = typename llvm_expr_t::type; + using e_type = std::remove_extent_t; + + static_assert(sizeof(v_type) == 16 || std::is_same_v, "Unknown vector type"); + + if constexpr (sizeof(e_type) == 1) + { + return extract(std::forward(value), 12); + } + else if constexpr (sizeof(e_type) == 2) + { + return extract(std::forward(value), 6); + } + else if constexpr (sizeof(e_type) == 4 || sizeof(v_type) == 32) + { + return extract(std::forward(value), 3); + } + else + { + return extract(std::forward(value), 1); + } + } + void set_reg_fixed(u32 index, llvm::Value* value, bool fixup = true) { llvm::StoreInst* dummy{}; @@ -4987,15 +5011,21 @@ public: void AND(spu_opcode_t op) { - if (const auto [a, b] = match_vrs(op.ra, op.rb); a && b) + if (match_vr(op.ra, [&](auto a, auto MP1) { - set_vr(op.rt, a & b); - return; - } + if (auto b = match_vr_as(a, op.rb)) + { + set_vr(op.rt, a & b); + return true; + } - if (const auto [a, b] = match_vrs(op.ra, op.rb); a && b) + return match_vr(op.rb, [&](auto b, auto MP2) + { + set_vr(op.rt, a & get_vr_as(a, op.rb)); + return true; + }); + })) { - set_vr(op.rt, a & b); return; } @@ -5086,36 +5116,37 @@ public: set_vr(op.rt, pshufb(get_vr(op.ra), sh)); } + template + auto spu_get_insertion_shuffle_mask(T&& index) + { + const auto c = bitcast(build(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10)); + using e_type = std::remove_extent_t; + const auto v = splat(static_cast(sizeof(e_type) == 8 ? 0x01020304050607ull : 0x010203ull)); + return insert(c, std::forward(index), v); + } + void CBX(spu_opcode_t op) { - const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); - const auto i = eval(~s & 0xf); - auto r = build(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10); - set_vr(op.rt, insert(r, i, splat(0x03))); + const auto s = get_scalar(get_vr(op.ra)) + get_scalar(get_vr(op.rb)); + set_vr(op.rt, spu_get_insertion_shuffle_mask(~s & 0xf)); } void CHX(spu_opcode_t op) { - const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); - const auto i = eval(~s >> 1 & 0x7); - auto r = build(0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011); - set_vr(op.rt, insert(r, i, splat(0x0203))); + const auto s = get_scalar(get_vr(op.ra)) + get_scalar(get_vr(op.rb)); + set_vr(op.rt, spu_get_insertion_shuffle_mask(~s >> 1 & 0x7)); } void CWX(spu_opcode_t op) { - const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); - const auto i = eval(~s >> 2 & 0x3); - auto r = build(0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213); - set_vr(op.rt, insert(r, i, splat(0x010203))); + const auto s = get_scalar(get_vr(op.ra)) + get_scalar(get_vr(op.rb)); + set_vr(op.rt, spu_get_insertion_shuffle_mask(~s >> 2 & 0x3)); } void CDX(spu_opcode_t op) { - const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); - const auto i = eval(~s >> 3 & 0x1); - auto r = build(0x18191a1b1c1d1e1f, 0x1011121314151617); - set_vr(op.rt, insert(r, i, splat(0x01020304050607))); + const auto s = get_scalar(get_vr(op.ra)) + get_scalar(get_vr(op.rb)); + set_vr(op.rt, spu_get_insertion_shuffle_mask(~s >> 3 & 0x1)); } void ROTQBI(spu_opcode_t op) @@ -5176,34 +5207,26 @@ public: void CBD(spu_opcode_t op) { - const auto a = eval(extract(get_vr(op.ra), 3) + get_imm(op.i7)); - const auto i = eval(~a & 0xf); - auto r = build(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10); - set_vr(op.rt, insert(r, i, splat(0x03))); + const auto a = get_scalar(get_vr(op.ra)) + get_imm(op.i7); + set_vr(op.rt, spu_get_insertion_shuffle_mask(~a & 0xf)); } void CHD(spu_opcode_t op) { - const auto a = eval(extract(get_vr(op.ra), 3) + get_imm(op.i7)); - const auto i = eval(~a >> 1 & 0x7); - auto r = build(0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011); - set_vr(op.rt, insert(r, i, splat(0x0203))); + const auto a = get_scalar(get_vr(op.ra)) + get_imm(op.i7); + set_vr(op.rt, spu_get_insertion_shuffle_mask(~a >> 1 & 0x7)); } void CWD(spu_opcode_t op) { - const auto a = eval(extract(get_vr(op.ra), 3) + get_imm(op.i7)); - const auto i = eval(~a >> 2 & 0x3); - auto r = build(0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213); - set_vr(op.rt, insert(r, i, splat(0x010203))); + const auto a = get_scalar(get_vr(op.ra)) + get_imm(op.i7); + set_vr(op.rt, spu_get_insertion_shuffle_mask(~a >> 2 & 0x3)); } void CDD(spu_opcode_t op) { - const auto a = eval(extract(get_vr(op.ra), 3) + get_imm(op.i7)); - const auto i = eval(~a >> 3 & 0x1); - auto r = build(0x18191a1b1c1d1e1f, 0x1011121314151617); - set_vr(op.rt, insert(r, i, splat(0x01020304050607))); + const auto a = get_scalar(get_vr(op.ra)) + get_imm(op.i7); + set_vr(op.rt, spu_get_insertion_shuffle_mask(~a >> 3 & 0x1)); } void ROTQBII(spu_opcode_t op) @@ -5656,48 +5679,21 @@ public: void SHUFB(spu_opcode_t op) // { - if (auto ii = llvm::dyn_cast_or_null(get_reg_raw(op.rc))) + if (match_vr(op.rc, [&](auto c, auto MP) { - // Detect if the mask comes from a CWD-like constant generation instruction - auto c0 = llvm::dyn_cast(ii->getOperand(0)); + using VT = typename decltype(MP)::type; - if (c0 && get_const_vector(c0, m_pos, op.rc) != v128::from64(0x18191a1b1c1d1e1f, 0x1011121314151617)) + // If the mask comes from a constant generation instruction, replace SHUFB with insert + if (auto [ok, i] = match_expr(c, spu_get_insertion_shuffle_mask(match())); ok) { - c0 = nullptr; + set_vr(op.rt4, insert(get_vr_as(c, op.rb), i, get_scalar(get_vr_as(c, op.ra)))); + return true; } - auto c1 = llvm::dyn_cast(ii->getOperand(1)); - - llvm::Type* vtype = nullptr; - llvm::Value* _new = nullptr; - - // Optimization: emit SHUFB as simple vector insert - if (c0 && c1 && c1->getType() == get_type() && c1->getZExtValue() == 0x01020304050607) - { - vtype = get_type(); - _new = extract(get_vr(op.ra), 1).eval(m_ir); - } - else if (c0 && c1 && c1->getType() == get_type() && c1->getZExtValue() == 0x010203) - { - vtype = get_type(); - _new = extract(get_vr(op.ra), 3).eval(m_ir); - } - else if (c0 && c1 && c1->getType() == get_type() && c1->getZExtValue() == 0x0203) - { - vtype = get_type(); - _new = extract(get_vr(op.ra), 6).eval(m_ir); - } - else if (c0 && c1 && c1->getType() == get_type() && c1->getZExtValue() == 0x03) - { - vtype = get_type(); - _new = extract(get_vr(op.ra), 12).eval(m_ir); - } - - if (vtype && _new) - { - set_reg_fixed(op.rt4, m_ir->CreateInsertElement(get_reg_fixed(op.rb, vtype), _new, ii->getOperand(2))); - return; - } + return false; + })) + { + return; } const auto c = get_vr(op.rc);