mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-10 17:01:24 +12:00
PPU: refactor VPack instructions
This commit is contained in:
parent
b404d37eb0
commit
248f9424ac
2 changed files with 162 additions and 105 deletions
|
@ -288,6 +288,7 @@ namespace asmjit
|
|||
if constexpr (arg_classify<B> == arg_class::reg_rv)
|
||||
{
|
||||
g_vc->vec_dealloc(vec_type{b.id()});
|
||||
//b = Operand();
|
||||
}
|
||||
}
|
||||
else if (utils::has_avx() && avx_op && (arg_classify<A> == arg_class::reg_lv || arg_classify<A> == arg_class::mem_lv))
|
||||
|
@ -317,6 +318,7 @@ namespace asmjit
|
|||
if constexpr (arg_classify<B> == arg_class::reg_rv)
|
||||
{
|
||||
g_vc->vec_dealloc(vec_type{b.id()});
|
||||
//b = Operand();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -334,6 +336,7 @@ namespace asmjit
|
|||
if constexpr (arg_classify<B> == arg_class::reg_rv)
|
||||
{
|
||||
g_vc->vec_dealloc(vec_type{b.id()});
|
||||
//b = Operand();
|
||||
}
|
||||
|
||||
if (arg_classify<A> == arg_class::mem_rv && a.isReg())
|
||||
|
@ -2020,6 +2023,90 @@ inline v128 gv_selectfs(const v128& _cmp, const v128& _true, const v128& _false)
|
|||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_packss_s16(const v128& low, const v128& high)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
return _mm_packs_epi16(low, high);
|
||||
#elif defined(ARCH_ARM64)
|
||||
return vcombine_s8(vqmovn_s16(low), vqmovn_s16(high));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_packus_s16(const v128& low, const v128& high)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
return _mm_packus_epi16(low, high);
|
||||
#elif defined(ARCH_ARM64)
|
||||
return vcombine_u8(vqmovun_s16(low), vqmovun_s16(high));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_packus_u16(const v128& low, const v128& high)
|
||||
{
|
||||
#if defined(__SSE4_1__)
|
||||
return _mm_packus_epi16(_mm_min_epu16(low, _mm_set1_epi16(0xff)), _mm_min_epu16(high, _mm_set1_epi16(0xff)));
|
||||
#elif defined(ARCH_X64)
|
||||
return _mm_packus_epi16(_mm_sub_epi16(low, _mm_subs_epu16(low, _mm_set1_epi16(0xff))), _mm_sub_epi16(high, _mm_subs_epu16(high, _mm_set1_epi16(0xff))));
|
||||
#elif defined(ARCH_ARM64)
|
||||
return vcombine_u8(vqmovn_u16(low), vqmovn_u16(high));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_packtu16(const v128& low, const v128& high)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
return _mm_packus_epi16(low & _mm_set1_epi16(0xff), high & _mm_set1_epi16(0xff));
|
||||
#elif defined(ARCH_ARM64)
|
||||
return vuzp1q_s8(low, high);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_packss_s32(const v128& low, const v128& high)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
return _mm_packs_epi32(low, high);
|
||||
#elif defined(ARCH_ARM64)
|
||||
return vcombine_s16(vqmovn_s32(low), vqmovn_s32(high));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_packus_s32(const v128& low, const v128& high)
|
||||
{
|
||||
#if defined(__SSE4_1__)
|
||||
return _mm_packus_epi32(low, high);
|
||||
#elif defined(ARCH_X64)
|
||||
const auto s = _mm_srai_epi16(_mm_packs_epi32(low, high), 15);
|
||||
const auto r = gv_add16(_mm_packs_epi32(gv_sub32(low, gv_bcst32(0x8000)), gv_sub32(high, gv_bcst32(0x8000))), gv_bcst16(0x8000));
|
||||
return gv_andn(s, r);
|
||||
#elif defined(ARCH_ARM64)
|
||||
return vcombine_u16(vqmovun_s32(low), vqmovun_s32(high));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_packus_u32(const v128& low, const v128& high)
|
||||
{
|
||||
#if defined(__SSE4_1__)
|
||||
return _mm_packus_epi32(_mm_min_epu32(low, _mm_set1_epi32(0xffff)), _mm_min_epu32(high, _mm_set1_epi32(0xffff)));
|
||||
#elif defined(ARCH_X64)
|
||||
const v128 s = _mm_cmpgt_epi16(_mm_packs_epi32(_mm_srli_epi32(low, 16), _mm_srli_epi32(high, 16)), _mm_setzero_si128());
|
||||
const v128 r = _mm_packs_epi32(_mm_srai_epi32(_mm_slli_epi32(low, 16), 16), _mm_srai_epi32(_mm_slli_epi32(high, 16), 16));
|
||||
return _mm_or_si128(r, s);
|
||||
#elif defined(ARCH_ARM64)
|
||||
return vcombine_u16(vqmovn_u32(low), vqmovn_u32(high));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_packtu32(const v128& low, const v128& high)
|
||||
{
|
||||
#if defined(__SSE4_1__)
|
||||
return _mm_packus_epi32(low & _mm_set1_epi32(0xffff), high & _mm_set1_epi32(0xffff));
|
||||
#elif defined(ARCH_X64)
|
||||
return _mm_packs_epi32(_mm_srai_epi32(_mm_slli_epi32(low, 16), 16), _mm_srai_epi32(_mm_slli_epi32(high, 16), 16));
|
||||
#elif defined(ARCH_ARM64)
|
||||
return vuzp1q_s16(low, high);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v128 gv_unpacklo8(const v128& lows, const v128& highs)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue