PPU LLVM: rewrite some packing instructions

Rewritten VPKSHSS, VPKSHUS, VPKSWSS, VPKSWUS.
Decoupled saturation test from sat pack pattern.
This commit is contained in:
Nekotekina 2021-06-17 18:24:21 +03:00
parent abe498f35c
commit e7c827f73b

View file

@ -1332,38 +1332,42 @@ void PPUTranslator::VPKPX(ppu_opcode_t op)
void PPUTranslator::VPKSHSS(ppu_opcode_t op) void PPUTranslator::VPKSHSS(ppu_opcode_t op)
{ {
const auto ab = GetVrs(VrType::vi16, op.va, op.vb); // Caution: potentially out-of-lane algorithm
const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }); const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
const auto saturated = SaturateSigned(src, -0x80, 0x7f); const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
SetVr(op.vd, saturated.first); const auto r = trunc<u8[16]>(min(max(ab, splat<s16[16]>(-0x80)), splat<s16[16]>(0x7f)));
SetSat(IsNotZero(saturated.second)); set_vr(op.vd, r);
SetSat(IsNotZero(eval(((a + 0x80) | (b + 0x80)) >> 8).value));
} }
void PPUTranslator::VPKSHUS(ppu_opcode_t op) void PPUTranslator::VPKSHUS(ppu_opcode_t op)
{ {
const auto ab = GetVrs(VrType::vi16, op.va, op.vb); // Caution: potentially out-of-lane algorithm
const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }); const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
const auto saturated = SaturateSigned(src, 0, 0xff); const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
SetVr(op.vd, saturated.first); const auto r = trunc<u8[16]>(min(max(ab, splat<s16[16]>(0)), splat<s16[16]>(0xff)));
SetSat(IsNotZero(saturated.second)); set_vr(op.vd, r);
SetSat(IsNotZero(eval((a | b) >> 8).value));
} }
void PPUTranslator::VPKSWSS(ppu_opcode_t op) void PPUTranslator::VPKSWSS(ppu_opcode_t op)
{ {
const auto ab = GetVrs(VrType::vi32, op.va, op.vb); // Caution: potentially out-of-lane algorithm
const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7 }); const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
const auto saturated = SaturateSigned(src, -0x8000, 0x7fff); const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7);
SetVr(op.vd, saturated.first); const auto r = trunc<u16[8]>(min(max(ab, splat<s32[8]>(-0x8000)), splat<s32[8]>(0x7fff)));
SetSat(IsNotZero(saturated.second)); set_vr(op.vd, r);
SetSat(IsNotZero(eval(((a + 0x8000) | (b + 0x8000)) >> 16).value));
} }
void PPUTranslator::VPKSWUS(ppu_opcode_t op) void PPUTranslator::VPKSWUS(ppu_opcode_t op)
{ {
const auto ab = GetVrs(VrType::vi32, op.va, op.vb); // Caution: potentially out-of-lane algorithm
const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7 }); const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
const auto saturated = SaturateSigned(src, 0, 0xffff); const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7);
SetVr(op.vd, saturated.first); const auto r = trunc<u16[8]>(min(max(ab, splat<s32[8]>(0)), splat<s32[8]>(0xffff)));
SetSat(IsNotZero(saturated.second)); set_vr(op.vd, r);
SetSat(IsNotZero(eval((a | b) >> 16).value));
} }
void PPUTranslator::VPKUHUM(ppu_opcode_t op) void PPUTranslator::VPKUHUM(ppu_opcode_t op)