mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 06:21:26 +12:00
arm64: implement pshufb intrinsic
This commit is contained in:
parent
cc1e4d2194
commit
84a785ea67
2 changed files with 15 additions and 5 deletions
|
@ -32,7 +32,21 @@ cpu_translator::cpu_translator(llvm::Module* _module, bool is_be)
|
||||||
|
|
||||||
if (m_use_ssse3)
|
if (m_use_ssse3)
|
||||||
{
|
{
|
||||||
|
#if defined(ARCH_X64)
|
||||||
return m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_ssse3_pshuf_b_128), {data0, index});
|
return m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_ssse3_pshuf_b_128), {data0, index});
|
||||||
|
#elif defined(ARCH_ARM64)
|
||||||
|
// Modified from sse2neon
|
||||||
|
// movi v2.16b, #143
|
||||||
|
// and v1.16b, v1.16b, v2.16b
|
||||||
|
// tbl v0.16b, { v0.16b }, v1.16b
|
||||||
|
auto mask = llvm::ConstantInt::get(get_type<u8[16]>(), 0x8F);
|
||||||
|
auto and_mask = llvm::ConstantInt::get(get_type<bool[16]>(), true);
|
||||||
|
auto vec_len = llvm::ConstantInt::get(get_type<u32>(), 16);
|
||||||
|
auto index_masked = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::vp_and), {index, mask, and_mask, vec_len});
|
||||||
|
return m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbl1), {data0, index_masked});
|
||||||
|
#else
|
||||||
|
#error "Unimplemented"
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -2896,12 +2896,8 @@ protected:
|
||||||
bool m_is_be;
|
bool m_is_be;
|
||||||
|
|
||||||
// Allow PSHUFB intrinsic
|
// Allow PSHUFB intrinsic
|
||||||
#ifdef ARCH_X64
|
|
||||||
bool m_use_ssse3 = true;
|
bool m_use_ssse3 = true;
|
||||||
#else
|
|
||||||
// TODO: fix the pshufb arm64 native impl using TBL instruction
|
|
||||||
bool m_use_ssse3 = false;
|
|
||||||
#endif
|
|
||||||
// Allow FMA
|
// Allow FMA
|
||||||
bool m_use_fma = false;
|
bool m_use_fma = false;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue