From 3e0b45719da886b10f9dc380980225fdbcae09f1 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 24 Apr 2019 16:05:29 +0300 Subject: [PATCH] LLVM DSL: rewrite zshuffle, shuffle2, build Add llvm_const_vector template. --- rpcs3/Emu/CPU/CPUTranslator.h | 111 ++++++++++++++++++--------- rpcs3/Emu/Cell/SPURecompiler.cpp | 125 +++++++++++++++---------------- 2 files changed, 136 insertions(+), 100 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 635cf66575..4a9632484f 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -395,6 +395,23 @@ struct llvm_const_float } }; +template +struct llvm_const_vector +{ + using type = T; + + T data; + + static constexpr bool is_ok = N && llvm_value_t::is_vector == N; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + static_assert(N && llvm_value_t::is_vector == N, "llvm_const_vector<>: invalid type"); + + return llvm::ConstantDataVector::get(ir->getContext(), data); + } +}; + template > struct llvm_add { @@ -1497,6 +1514,48 @@ struct llvm_splat } }; +template > +struct llvm_zshuffle +{ + using type = std::remove_extent_t[N]; + + llvm_expr_t a1; + u32 index_array[N]; + + static_assert(llvm_value_t::is_vector, "llvm_zshuffle<>: invalid type"); + + static constexpr bool is_ok = llvm_value_t::is_vector && 1; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + + return ir->CreateShuffleVector(v1, llvm::ConstantAggregateZero::get(v1->getType()), index_array); + } +}; + +template > +struct llvm_shuffle2 +{ + using type = std::remove_extent_t[N]; + + llvm_expr_t a1; + llvm_expr_t a2; + u32 index_array[N]; + + static_assert(llvm_value_t::is_vector, "llvm_shuffle2<>: invalid type"); + + static constexpr bool is_ok = llvm_value_t::is_vector && 1; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + return ir->CreateShuffleVector(v1, v2, index_array); + } +}; + class cpu_translator { protected: @@ -1693,6 +1752,24 @@ public: return llvm_splat{std::forward(v)}; } + template ::is_ok>> + static auto build(Args... args) + { + return llvm_const_vector{static_cast>(args)...}; + } + + template ::is_ok>> + static auto zshuffle(T&& v, Args... indices) + { + return llvm_zshuffle{std::forward(v), {static_cast(indices)...}}; + } + + template ::is_ok>> + static auto shuffle2(T&& v1, U&& v2, Args... indices) + { + return llvm_shuffle2{std::forward(v1), std::forward(v2), {static_cast(indices)...}}; + } + // Average: (a + b + 1) >> 1 template inline auto avg(T a, T b) @@ -1714,40 +1791,6 @@ public: return result; } - // Shuffle single vector using all zeros second vector of the same size - template - auto zshuffle(T1 a, Args... args) - { - static_assert(sizeof(T) / sizeof(std::remove_extent_t) == sizeof...(Args), "zshuffle: unexpected result type"); - const u32 values[]{static_cast(args)...}; - value_t result; - result.value = a.eval(m_ir); - result.value = m_ir->CreateShuffleVector(result.value, llvm::ConstantInt::get(result.value->getType(), 0), values); - return result; - } - - template - auto shuffle2(T1 a, T2 b, Args... args) - { - static_assert(sizeof(T) / sizeof(std::remove_extent_t) == sizeof...(Args), "shuffle2: unexpected result type"); - const u32 values[]{static_cast(args)...}; - value_t result; - result.value = a.eval(m_ir); - result.value = m_ir->CreateShuffleVector(result.value, b.eval(m_ir), values); - return result; - } - - template - auto build(Args... args) - { - using value_type = std::remove_extent_t; - const value_type values[]{static_cast(args)...}; - static_assert(sizeof(T) / sizeof(value_type) == sizeof...(Args), "build: unexpected number of arguments"); - value_t result; - result.value = llvm::ConstantDataVector::get(m_context, values); - return result; - } - template llvm::Function* get_intrinsic(llvm::Intrinsic::ID id) { diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 0b52a522b4..e4a48926f2 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -5027,22 +5027,22 @@ public: void ROTQBYBI(spu_opcode_t op) { - auto sh = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - sh = eval((sh - (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)) & 0xf); + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const auto sh = (sc - (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)) & 0xf; set_vr(op.rt, pshufb(get_vr(op.ra), sh)); } void ROTQMBYBI(spu_opcode_t op) { - auto sh = build(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); - sh = eval(sh + (-(zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3) & 0x1f)); + const auto sc = build(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); + const auto sh = sc + (-(zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3) & 0x1f); set_vr(op.rt, pshufb(get_vr(op.ra), sh)); } void SHLQBYBI(spu_opcode_t op) { - auto sh = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - sh = eval(sh - (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)); + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const auto sh = sc - (zshuffle(get_vr(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3); set_vr(op.rt, pshufb(get_vr(op.ra), sh)); } @@ -5081,30 +5081,30 @@ public: void ROTQBI(spu_opcode_t op) { const auto a = get_vr(op.ra); - const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3); - set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b)); + const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b)); } void ROTQMBI(spu_opcode_t op) { const auto a = get_vr(op.ra); - const auto b = zshuffle(-get_vr(op.rb) & 0x7, 3, 3, 3, 3); - set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b)); + const auto b = zshuffle(-get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b)); } void SHLQBI(spu_opcode_t op) { const auto a = get_vr(op.ra); - const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3); - set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b)); + const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b)); } void ROTQBY(spu_opcode_t op) { const auto a = get_vr(op.ra); const auto b = get_vr(op.rb); - auto sh = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - sh = eval((sh - zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)) & 0xf); + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const auto sh = eval((sc - zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)) & 0xf); set_vr(op.rt, pshufb(a, sh)); } @@ -5112,8 +5112,8 @@ public: { const auto a = get_vr(op.ra); const auto b = get_vr(op.rb); - auto sh = build(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); - sh = eval(sh + (-zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f)); + const auto sc = build(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); + const auto sh = sc + (-zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f); set_vr(op.rt, pshufb(a, sh)); } @@ -5121,17 +5121,17 @@ public: { const auto a = get_vr(op.ra); const auto b = get_vr(op.rb); - auto sh = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - sh = eval(sh - (zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f)); + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const auto sh = sc - (zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f); set_vr(op.rt, pshufb(a, sh)); } void ORX(spu_opcode_t op) { const auto a = get_vr(op.ra); - const auto x = zshuffle(a, 2, 3, 0, 1) | a; - const auto y = zshuffle(x, 1, 0, 3, 2) | x; - set_vr(op.rt, zshuffle(y, 4, 4, 4, 3)); + const auto x = zshuffle(a, 2, 3, 0, 1) | a; + const auto y = zshuffle(x, 1, 0, 3, 2) | x; + set_vr(op.rt, zshuffle(y, 4, 4, 4, 3)); } void CBD(spu_opcode_t op) @@ -5170,44 +5170,44 @@ public: { const auto a = get_vr(op.ra); const auto b = eval(get_imm(op.i7, false) & 0x7); - set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b)); + set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b)); } void ROTQMBII(spu_opcode_t op) { const auto a = get_vr(op.ra); const auto b = eval(-get_imm(op.i7, false) & 0x7); - set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b)); + set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b)); } void SHLQBII(spu_opcode_t op) { const auto a = get_vr(op.ra); const auto b = eval(get_imm(op.i7, false) & 0x7); - set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b)); + set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b)); } void ROTQBYI(spu_opcode_t op) { const auto a = get_vr(op.ra); - auto sh = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - sh = eval((sh - get_imm(op.i7, false)) & 0xf); + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const auto sh = (sc - get_imm(op.i7, false)) & 0xf; set_vr(op.rt, pshufb(a, sh)); } void ROTQMBYI(spu_opcode_t op) { const auto a = get_vr(op.ra); - auto sh = build(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); - sh = eval(sh + (-get_imm(op.i7, false) & 0x1f)); + const auto sc = build(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); + const auto sh = sc + (-get_imm(op.i7, false) & 0x1f); set_vr(op.rt, pshufb(a, sh)); } void SHLQBYI(spu_opcode_t op) { const auto a = get_vr(op.ra); - auto sh = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - sh = eval(sh - (get_imm(op.i7, false) & 0x1f)); + const auto sc = build(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const auto sh = sc - (get_imm(op.i7, false) & 0x1f); set_vr(op.rt, pshufb(a, sh)); } @@ -5242,8 +5242,8 @@ public: const auto b = get_vr(op.rb); const auto ahs = eval((a >> 8) + (a & 0xff)); const auto bhs = eval((b >> 8) + (b & 0xff)); - const auto lsh = shuffle2(ahs, bhs, 0, 9, 2, 11, 4, 13, 6, 15); - const auto hsh = shuffle2(ahs, bhs, 1, 8, 3, 10, 5, 12, 7, 14); + const auto lsh = shuffle2(ahs, bhs, 0, 9, 2, 11, 4, 13, 6, 15); + const auto hsh = shuffle2(ahs, bhs, 1, 8, 3, 10, 5, 12, 7, 14); set_vr(op.rt, lsh + hsh); } @@ -5952,7 +5952,7 @@ public: { if (g_cfg.core.spu_accurate_xfloat) { - const auto r = shuffle2(get_vr(op.ra), fsplat(0.), 1, 3); + const auto r = shuffle2(get_vr(op.ra), fsplat(0.), 1, 3); const auto d = bitcast(r); const auto a = eval(d & 0x7fffffffffffffff); const auto s = eval(d & 0x8000000000000000); @@ -5963,7 +5963,7 @@ public: else { value_t r; - r.value = m_ir->CreateFPExt(shuffle2(get_vr(op.ra), fsplat(0.), 1, 3).value, get_type()); + r.value = m_ir->CreateFPExt(shuffle2(get_vr(op.ra), fsplat(0.), 1, 3).eval(m_ir), get_type()); set_vr(op.rt, r); } } @@ -5979,13 +5979,13 @@ public: const auto i = select(a > 0x47f0000000000000, eval(s | 0x47f0000000000000), d); const auto n = select(a > 0x7ff0000000000000, splat(0x47f8000000000000), i); const auto z = select(a < 0x3810000000000000, s, n); - set_vr(op.rt, shuffle2(bitcast(z), fsplat(0.), 2, 0, 3, 1), false); + set_vr(op.rt, shuffle2(bitcast(z), fsplat(0.), 2, 0, 3, 1), false); } else { value_t r; r.value = m_ir->CreateFPTrunc(get_vr(op.ra).value, get_type()); - set_vr(op.rt, shuffle2(r, fsplat(0.), 2, 0, 3, 1)); + set_vr(op.rt, shuffle2(r, fsplat(0.), 2, 0, 3, 1)); } } @@ -6214,7 +6214,7 @@ public: if (auto ca = llvm::dyn_cast(a.value)) { v128 data = get_const_vector(ca, m_pos, 25971); - r = build(data._s32[0], data._s32[1], data._s32[2], data._s32[3]); + r.value = build(data._s32[0], data._s32[1], data._s32[2], data._s32[3]).eval(m_ir); } else { @@ -6255,7 +6255,7 @@ public: if (auto ca = llvm::dyn_cast(a.value)) { v128 data = get_const_vector(ca, m_pos, 20971); - r = build(data._u32[0], data._u32[1], data._u32[2], data._u32[3]); + r.value = build(data._u32[0], data._u32[1], data._u32[2], data._u32[3]).eval(m_ir); } else { @@ -6286,38 +6286,41 @@ public: } } + void make_store_ls(value_t addr, value_t data) + { + const auto bswapped = zshuffle(data, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); + } + + auto make_load_ls(value_t addr) + { + value_t data; + data.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); + return zshuffle(data, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + } + void STQX(spu_opcode_t op) { value_t addr = eval(zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0)); - value_t r = get_vr(op.rt); - r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); + make_store_ls(addr, get_vr(op.rt)); } void LQX(spu_opcode_t op) { value_t addr = eval(zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0)); - value_t r; - r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); - r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - set_vr(op.rt, r); + set_vr(op.rt, make_load_ls(addr)); } void STQA(spu_opcode_t op) { value_t addr = eval((get_imm(op.i16, false) << 2) & 0x3fff0); - value_t r = get_vr(op.rt); - r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); + make_store_ls(addr, get_vr(op.rt)); } void LQA(spu_opcode_t op) { value_t addr = eval((get_imm(op.i16, false) << 2) & 0x3fff0); - value_t r; - r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); - r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - set_vr(op.rt, r); + set_vr(op.rt, make_load_ls(addr)); } void STQR(spu_opcode_t op) // @@ -6325,9 +6328,7 @@ public: value_t addr; addr.value = m_interp_magn ? m_ir->CreateZExt(m_interp_pc, get_type()) : m_ir->getInt64(m_pos); addr = eval(((get_imm(op.i16, false) << 2) + addr) & 0x3fff0); - value_t r = get_vr(op.rt); - r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); + make_store_ls(addr, get_vr(op.rt)); } void LQR(spu_opcode_t op) // @@ -6335,27 +6336,19 @@ public: value_t addr; addr.value = m_interp_magn ? m_ir->CreateZExt(m_interp_pc, get_type()) : m_ir->getInt64(m_pos); addr = eval(((get_imm(op.i16, false) << 2) + addr) & 0x3fff0); - value_t r; - r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); - r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - set_vr(op.rt, r); + set_vr(op.rt, make_load_ls(addr)); } void STQD(spu_opcode_t op) { value_t addr = eval(zext((extract(get_vr(op.ra), 3) + (get_imm(op.si10) << 4)) & 0x3fff0)); - value_t r = get_vr(op.rt); - r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); + make_store_ls(addr, get_vr(op.rt)); } void LQD(spu_opcode_t op) { value_t addr = eval(zext((extract(get_vr(op.ra), 3) + (get_imm(op.si10) << 4)) & 0x3fff0)); - value_t r; - r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); - r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - set_vr(op.rt, r); + set_vr(op.rt, make_load_ls(addr)); } void make_halt(value_t cond)