mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-13 10:18:40 +12:00
SPU: fix Giga mode (kinda)
Don't scan before the entry point. Disable stack mirror in SPU LLVM. Improve analyser logic for holes.
This commit is contained in:
parent
c481472faf
commit
3753d27aba
3 changed files with 58 additions and 41 deletions
|
@ -144,7 +144,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
|
|||
m_pos = func[0];
|
||||
m_base = func[0];
|
||||
m_size = ::size32(func) * 4 - 4;
|
||||
const u32 start = m_pos * (g_cfg.core.spu_block_size != spu_block_size_type::giga);
|
||||
const u32 start = m_pos;
|
||||
const u32 end = start + m_size;
|
||||
|
||||
// Create block labels
|
||||
|
@ -226,7 +226,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
|
|||
c->vzeroupper();
|
||||
}
|
||||
}
|
||||
else if (m_size == 8 && (g_cfg.core.spu_block_size != spu_block_size_type::giga || func[0] != 4))
|
||||
else if (m_size == 8)
|
||||
{
|
||||
c->mov(x86::rax, static_cast<u64>(func[2]) << 32 | func[1]);
|
||||
c->cmp(x86::rax, x86::qword_ptr(*ls, *pc0));
|
||||
|
@ -237,9 +237,9 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
|
|||
c->vzeroupper();
|
||||
}
|
||||
}
|
||||
else if (m_size == 8 || m_size == 4)
|
||||
else if (m_size == 4)
|
||||
{
|
||||
c->cmp(x86::dword_ptr(*ls, *pc0), +func.back());
|
||||
c->cmp(x86::dword_ptr(*ls, *pc0), func[1]);
|
||||
c->jnz(label_diff);
|
||||
|
||||
if (utils::has_avx())
|
||||
|
@ -724,7 +724,7 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
|
|||
// Acknowledge success and add statistics
|
||||
c->add(SPU_OFF_64(block_counter), ::size32(words) / (words_align / 4));
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::giga && m_pos != start)
|
||||
if (m_pos != start)
|
||||
{
|
||||
// Jump to the entry point if necessary
|
||||
c->jmp(instr_labels[m_pos]);
|
||||
|
@ -971,7 +971,7 @@ void spu_recompiler::branch_fixed(u32 target)
|
|||
return;
|
||||
}
|
||||
|
||||
const auto ppptr = g_cfg.core.spu_block_size == spu_block_size_type::giga || !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint();
|
||||
const auto ppptr = !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint();
|
||||
|
||||
c->lea(addr->r64(), get_pc(target));
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
|
@ -1088,7 +1088,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
|
|||
}
|
||||
|
||||
// Simply external call (return or indirect call)
|
||||
const auto ppptr = g_cfg.core.spu_block_size == spu_block_size_type::giga || !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint();
|
||||
const auto ppptr = !g_cfg.core.spu_verification ? nullptr : m_spurt->make_branch_patchpoint();
|
||||
|
||||
if (ppptr)
|
||||
{
|
||||
|
|
|
@ -226,6 +226,12 @@ std::deque<std::vector<u32>> spu_cache::get()
|
|||
break;
|
||||
}
|
||||
|
||||
if (!size || !func[1])
|
||||
{
|
||||
// Skip old format Giga entries
|
||||
continue;
|
||||
}
|
||||
|
||||
result.emplace_front(std::move(func));
|
||||
}
|
||||
|
||||
|
@ -349,7 +355,7 @@ void spu_cache::initialize()
|
|||
}
|
||||
|
||||
// Get data start
|
||||
const u32 start = func[0] * (g_cfg.core.spu_block_size != spu_block_size_type::giga);
|
||||
const u32 start = func[0];
|
||||
const u32 size0 = ::size32(func);
|
||||
|
||||
// Initialize LS with function data only
|
||||
|
@ -448,7 +454,7 @@ bool spu_runtime::func_compare::operator()(const std::vector<u32>& lhs, const st
|
|||
else if (rhs_data.empty())
|
||||
return false;
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
if (false)
|
||||
{
|
||||
// In Giga mode, compare instructions starting from the entry point first
|
||||
lhs_data.remove_prefix(lhs_addr / 4);
|
||||
|
@ -507,7 +513,7 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
|
|||
const std::vector<u32>& func = where.first;
|
||||
|
||||
//
|
||||
const u32 _off = 1 + (func[0] / 4) * (g_cfg.core.spu_block_size == spu_block_size_type::giga);
|
||||
const u32 _off = 1 + (func[0] / 4) * (false);
|
||||
|
||||
// Set pointer to the compiled function
|
||||
where.second = compiled;
|
||||
|
@ -515,23 +521,27 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
|
|||
// Register function in PIC map
|
||||
m_pic_map[{func.data() + _off, func.size() - _off}] = compiled;
|
||||
|
||||
// Prepare sorted list
|
||||
m_flat_list.clear();
|
||||
m_flat_list.assign(m_pic_map.cbegin(), m_pic_map.cend());
|
||||
|
||||
struct work
|
||||
{
|
||||
u32 size;
|
||||
u16 from;
|
||||
u16 level;
|
||||
u8* rel32;
|
||||
decltype(m_pic_map)::iterator beg;
|
||||
decltype(m_pic_map)::iterator end;
|
||||
decltype(m_flat_list)::iterator beg;
|
||||
decltype(m_flat_list)::iterator end;
|
||||
};
|
||||
|
||||
// Scratch vector
|
||||
static thread_local std::vector<work> workload;
|
||||
|
||||
// Generate a dispatcher (übertrampoline)
|
||||
const auto beg = m_pic_map.begin();
|
||||
const auto _end = m_pic_map.end();
|
||||
const u32 size0 = ::size32(m_pic_map);
|
||||
const auto beg = m_flat_list.begin();
|
||||
const auto _end = m_flat_list.end();
|
||||
const u32 size0 = ::size32(m_flat_list);
|
||||
|
||||
if (size0 == 1)
|
||||
{
|
||||
|
@ -630,6 +640,19 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
|
|||
{
|
||||
// Cannot split: some functions contain holes at this level
|
||||
w.level++;
|
||||
|
||||
// Resort subrange starting from the new level
|
||||
std::stable_sort(w.beg, w.end, [&](const auto& a, const auto& b)
|
||||
{
|
||||
std::basic_string_view<u32> lhs = a.first;
|
||||
std::basic_string_view<u32> rhs = b.first;
|
||||
|
||||
lhs.remove_prefix(w.level);
|
||||
rhs.remove_prefix(w.level);
|
||||
|
||||
return lhs < rhs;
|
||||
});
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -662,7 +685,7 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
|
|||
if (w.level >= w.beg->first.size() || w.level >= it->first.size())
|
||||
{
|
||||
// If functions cannot be compared, assume smallest function
|
||||
LOG_FATAL(SPU, "Trampoline simplified at 0x%x (level=%u)", func[0], w.level);
|
||||
LOG_ERROR(SPU, "Trampoline simplified at 0x%x (level=%u)", func[0], w.level);
|
||||
make_jump(0xe9, w.beg->second); // jmp rel32
|
||||
continue;
|
||||
}
|
||||
|
@ -671,13 +694,13 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
|
|||
const u32 x = it->first.at(w.level);
|
||||
|
||||
// Adjust ranges (backward)
|
||||
while (it != m_pic_map.begin())
|
||||
while (it != m_flat_list.begin())
|
||||
{
|
||||
it--;
|
||||
|
||||
if (w.level >= it->first.size())
|
||||
{
|
||||
it = m_pic_map.end();
|
||||
it = m_flat_list.end();
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -692,9 +715,9 @@ bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compile
|
|||
size2++;
|
||||
}
|
||||
|
||||
if (it == m_pic_map.end())
|
||||
if (it == m_flat_list.end())
|
||||
{
|
||||
LOG_FATAL(SPU, "Trampoline simplified (II) at 0x%x (level=%u)", func[0], w.level);
|
||||
LOG_ERROR(SPU, "Trampoline simplified (II) at 0x%x (level=%u)", func[0], w.level);
|
||||
make_jump(0xe9, w.beg->second); // jmp rel32
|
||||
continue;
|
||||
}
|
||||
|
@ -824,7 +847,7 @@ void* spu_runtime::find(u64 last_reset_count, const std::vector<u32>& func)
|
|||
}
|
||||
|
||||
//
|
||||
const u32 _off = 1 + (func[0] / 4) * (g_cfg.core.spu_block_size == spu_block_size_type::giga);
|
||||
const u32 _off = 1 + (func[0] / 4) * (false);
|
||||
|
||||
// Try to find PIC first
|
||||
const auto found = m_pic_map.find({func.data() + _off, func.size() - _off});
|
||||
|
@ -1154,8 +1177,6 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en
|
|||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
{
|
||||
// In Giga mode, all data starts from the address 0
|
||||
lsa = 0;
|
||||
}
|
||||
|
||||
for (u32 wi = 0, wa = workload[0]; wi < workload.size();)
|
||||
|
@ -1842,7 +1863,7 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en
|
|||
}
|
||||
}
|
||||
|
||||
while (g_cfg.core.spu_block_size != spu_block_size_type::giga || limit < 0x40000)
|
||||
while (lsa > 0 || limit < 0x40000)
|
||||
{
|
||||
const u32 initial_size = result.size();
|
||||
|
||||
|
@ -2032,14 +2053,6 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en
|
|||
continue;
|
||||
}
|
||||
|
||||
// Erase unreachable targets
|
||||
const auto new_end = std::remove_if(it->second.begin(), it->second.end(), [&](u32 addr)
|
||||
{
|
||||
return addr < lsa || addr >= limit;
|
||||
});
|
||||
|
||||
it->second.erase(new_end, it->second.end());
|
||||
|
||||
it++;
|
||||
}
|
||||
|
||||
|
@ -3013,7 +3026,7 @@ const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 en
|
|||
|
||||
if (f.second.good)
|
||||
{
|
||||
LOG_ERROR(SPU, "Function 0x%05x: calls bad function (0x%05x)", f.first, ffound->first);
|
||||
LOG_ERROR(SPU, "Function 0x%05x: calls bad function (0x%05x)", f.first, call);
|
||||
f.second.good = false;
|
||||
}
|
||||
}
|
||||
|
@ -3040,7 +3053,7 @@ void spu_recompiler_base::dump(std::string& out)
|
|||
SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
|
||||
dis_asm.offset = reinterpret_cast<const u8*>(result.data() + 1);
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
if (true)
|
||||
{
|
||||
dis_asm.offset -= result[0];
|
||||
}
|
||||
|
@ -3299,15 +3312,15 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
// Create tail call to the function chunk (non-tail calls are just out of question)
|
||||
void tail_chunk(llvm::Value* chunk, llvm::Value* base_pc = nullptr)
|
||||
{
|
||||
if (!chunk && (g_cfg.core.spu_block_size == spu_block_size_type::giga || !g_cfg.core.spu_verification))
|
||||
if (!chunk && !g_cfg.core.spu_verification)
|
||||
{
|
||||
// Disable patchpoints in some cases
|
||||
// Disable patchpoints if verification is disabled
|
||||
chunk = m_dispatch;
|
||||
}
|
||||
else if (!chunk)
|
||||
{
|
||||
// Create branch patchpoint if chunk == nullptr
|
||||
verify(HERE), m_finfo, !m_finfo->fn;
|
||||
verify(HERE), m_finfo, !m_finfo->fn || m_function == m_finfo->chunk;
|
||||
|
||||
// Register under a unique linkable name
|
||||
const std::string ppname = fmt::format("%s-pp-0x%05x", m_hash, m_pos);
|
||||
|
@ -4111,7 +4124,7 @@ public:
|
|||
m_pos = func[0];
|
||||
m_base = func[0];
|
||||
m_size = (func.size() - 1) * 4;
|
||||
const u32 start = m_pos * (g_cfg.core.spu_block_size != spu_block_size_type::giga);
|
||||
const u32 start = m_pos;
|
||||
const u32 end = start + m_size;
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
|
@ -4169,7 +4182,7 @@ public:
|
|||
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu32), m_ir->getInt32(func[1]));
|
||||
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
|
||||
}
|
||||
else if (func.size() - 1 == 2 && g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
else if (func.size() - 1 == 2)
|
||||
{
|
||||
const auto pu64 = m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_base_pc), get_type<u64*>());
|
||||
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu64), m_ir->getInt64(static_cast<u64>(func[2]) << 32 | func[1]));
|
||||
|
@ -5617,6 +5630,7 @@ public:
|
|||
m_ir->CreateUnreachable();
|
||||
m_ir->SetInsertPoint(next);
|
||||
m_ir->CreateStore(ci, spu_ptr<u8>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::cmd));
|
||||
update_pc();
|
||||
call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread);
|
||||
return;
|
||||
}
|
||||
|
@ -7698,7 +7712,7 @@ public:
|
|||
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
|
||||
const auto type = m_finfo->chunk->getFunctionType()->getPointerTo()->getPointerTo();
|
||||
|
||||
if (ret && g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
if (ret && g_cfg.core.spu_block_size == spu_block_size_type::mega)
|
||||
{
|
||||
// Compare address stored in stack mirror with addr
|
||||
const auto stack0 = eval(zext<u64>(sp) + ::offset32(&spu_thread::stack_mirror));
|
||||
|
@ -8089,7 +8103,7 @@ public:
|
|||
return;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe && m_block_info[m_pos / 4 + 1] && m_entry_info[m_pos / 4 + 1])
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::mega && m_block_info[m_pos / 4 + 1] && m_entry_info[m_pos / 4 + 1])
|
||||
{
|
||||
// Store the return function chunk address at the stack mirror
|
||||
const auto pfunc = add_function(m_pos + 4);
|
||||
|
|
|
@ -59,6 +59,9 @@ class spu_runtime
|
|||
// Debug module output location
|
||||
std::string m_cache_path;
|
||||
|
||||
// Scratch vector
|
||||
std::vector<std::pair<std::basic_string_view<u32>, spu_function_t>> m_flat_list;
|
||||
|
||||
public:
|
||||
|
||||
// Trampoline to spu_recompiler_base::dispatch
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue