This commit is contained in:
Nekotekina 2016-06-07 23:24:20 +03:00
parent 6fa5e2cc7c
commit a8bebcba55
31 changed files with 7346 additions and 213 deletions

View file

@ -13,8 +13,11 @@
#include "Emu/Cell/lv2/sys_prx.h"
#include <unordered_set>
#include <algorithm>
#include "yaml-cpp/yaml.h"
const ppu_decoder<ppu_itype> s_ppu_itype;
//const ppu_decoder<ppu_iname> s_ppu_iname;
LOG_CHANNEL(cellAdec);
LOG_CHANNEL(cellAtrac);
@ -117,6 +120,8 @@ extern std::string ppu_get_variable_name(const std::string& module, u32 vnid);
extern void sys_initialize_tls(PPUThread&, u64, u32, u32, u32);
extern void ppu_initialize(const std::string& name, const std::vector<std::pair<u32, u32>>& set, u32 entry);
// Function lookup table. Not supposed to grow after emulation start.
std::vector<ppu_function_t> g_ppu_function_cache;
@ -138,20 +143,14 @@ extern void ppu_execute_function(PPUThread& ppu, u32 index)
{
func(ppu);
}
catch (EmulationStopped)
{
LOG_WARNING(PPU, "Function '%s' aborted", ppu.last_function);
ppu.last_function = previous_function;
throw;
}
catch (...)
{
LOG_ERROR(PPU, "Function '%s' aborted", ppu.last_function);
logs::HLE.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Function '%s' aborted", ppu.last_function);
ppu.last_function = previous_function;
throw;
}
LOG_TRACE(PPU, "Function '%s' finished, r3=0x%llx", ppu.last_function, ppu.GPR[3]);
LOG_TRACE(HLE, "Function '%s' finished, r3=0x%llx", ppu.last_function, ppu.GPR[3]);
ppu.last_function = previous_function;
return;
}
@ -657,12 +656,16 @@ static auto ppu_load_exports(const std::shared_ptr<ppu_linkage_info>& link, u32
return result;
}
static void ppu_load_imports(const std::shared_ptr<ppu_linkage_info>& link, u32 imports_start, u32 imports_end)
static u32 ppu_load_imports(const std::shared_ptr<ppu_linkage_info>& link, u32 imports_start, u32 imports_end)
{
u32 result = imports_start;
for (u32 addr = imports_start; addr < imports_end;)
{
const auto& lib = vm::_ref<const ppu_prx_module_info>(addr);
result = std::min<u32>(result, lib.name.addr());
const std::string module_name(lib.name.get_ptr());
LOG_NOTICE(LOADER, "** Imported module '%s' (0x%x, 0x%x)", module_name, lib.unk4, lib.unk5);
@ -720,12 +723,404 @@ static void ppu_load_imports(const std::shared_ptr<ppu_linkage_info>& link, u32
addr += lib.size ? lib.size : sizeof(ppu_prx_module_info);
}
return result;
}
// Returns max branch address of jumptable
never_inline static u32 ppu_is_jumptable(vm::ptr<u32>& start_ptr, u32 start, u32 end)
{
u32 max_addr = 0;
if (end - start_ptr.addr() < 8)
{
return 0;
}
for (vm::ptr<u32> ptr = start_ptr; ptr.addr() < end; ptr++)
{
const u32 addr = start_ptr.addr() + *ptr;
if (addr % 4 || addr < start || addr >= end)
{
if (ptr - start_ptr < 2)
{
return 0;
}
start_ptr = ptr;
return max_addr;
}
max_addr = std::max<u32>(max_addr, addr);
}
start_ptr = vm::cast(end);
return max_addr;
}
// Guess whether the function cannot be divided at specific position `split`
static bool ppu_is_coherent(u32 start, u32 end, u32 split)
{
// Check if the block before `split` is directly connected (can fall through)
for (vm::ptr<u32> rptr = vm::cast(split - 4);; rptr--)
{
const u32 _last = *rptr;
// Skip NOPs
if (_last == ppu_instructions::NOP())
{
if (rptr.addr() == start) return true;
continue;
}
switch (const auto type = s_ppu_itype.decode(_last))
{
case ppu_itype::UNK:
case ppu_itype::TD:
case ppu_itype::TDI:
case ppu_itype::TW:
case ppu_itype::TWI:
{
break;
}
case ppu_itype::B:
{
if (ppu_opcode_t{_last}.lk) return true;
break;
}
case ppu_itype::BC:
case ppu_itype::BCLR:
{
if (ppu_opcode_t{_last}.lk || (ppu_opcode_t{_last}.bo & 0x14) != 0x14) return true;
break;
}
case ppu_itype::BCCTR:
{
if (ppu_opcode_t{_last}.lk || (ppu_opcode_t{_last}.bo & 0x10) == 0) return true;
break;
}
default:
{
return true;
}
}
break;
}
// Find branches from one part to another
for (vm::ptr<u32> ptr = vm::cast(start); ptr.addr() < split; ptr++)
{
const u32 value = *ptr;
const auto type = s_ppu_itype.decode(value);
const ppu_opcode_t op{value};
if (type == ppu_itype::B || type == ppu_itype::BC)
{
const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16);
if (target % 4 == 0 && target >= split && target < end)
{
return !op.lk;
}
}
if (type == ppu_itype::BCCTR && !op.lk)
{
const u32 max = ppu_is_jumptable(++ptr, start, end);
if (max && max >= split)
{
return true;
}
ptr--;
}
}
// TODO: ???
return false;
}
static std::vector<std::pair<u32, u32>> ppu_analyse(u32 start, u32 end, const std::vector<std::pair<u32, u32>>& segs, u32 rtoc)
{
// Function entries (except the last one)
std::set<u32> result
{
end,
};
// Instruction usage stats
//std::unordered_map<const char*, u64> stats;
// Jumptable entries (addr->size)
std::unordered_map<u32, u32> jts;
// Block entries
std::set<u32> blocks;
// First pass; Detect branch + link instructions
for (vm::ptr<u32> ptr = vm::cast(start); ptr.addr() < end; ptr++)
{
const u32 value = *ptr;
const auto type = s_ppu_itype.decode(value);
//const auto name = s_ppu_iname.decode(value);
const ppu_opcode_t op{value};
if (type == ppu_itype::B || type == ppu_itype::BC)
{
const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16);
if (op.lk && target % 4 == 0 && target >= start && target < end && target != ptr.addr())
{
LOG_NOTICE(PPU, "BCall: 0x%x -> 0x%x", ptr, target);
result.emplace(target);
}
if (!op.lk && target % 4 == 0 && target >= start && target < end)
{
blocks.emplace(target);
}
}
if (type == ppu_itype::BCCTR && !op.lk)
{
const auto jt = ++ptr;
if (ppu_is_jumptable(ptr, start, end))
{
LOG_NOTICE(PPU, "JTable: 0x%x .. 0x%x", jt, ptr);
jts.emplace(jt.addr(), ptr.addr() - jt.addr());
for (auto _ptr = jt; _ptr != ptr; _ptr++)
{
blocks.emplace(jt.addr() + *_ptr);
}
}
else
{
LOG_NOTICE(PPU, "BCCTR: 0x%x", ptr - 1);
}
ptr--;
}
//stats[name]++;
}
// Find OPD table
for (const auto& seg : segs)
{
for (vm::ptr<u32> ptr = vm::cast(seg.first); ptr.addr() < seg.first + seg.second; ptr++)
{
if (ptr[0] >= start && ptr[0] < end && ptr[0] % 4 == 0 && ptr[1] == rtoc)
{
while (ptr[0] >= start && ptr[0] < end && ptr[0] % 4 == 0 && !jts.count(ptr[0]) /*&& ptr[1] == rtoc*/)
{
LOG_NOTICE(PPU, "OPD: 0x%x -> 0x%x (rtoc=0x%x)", ptr, ptr[0], ptr[1]);
result.emplace(ptr[0]);
ptr += 2;
}
break;
}
}
}
// Find more block entries
for (const auto& seg : segs)
{
for (vm::ptr<u32> ptr = vm::cast(seg.first); ptr.addr() < seg.first + seg.second; ptr++)
{
const u32 value = *ptr;
if (value % 4 == 0 && value >= start && value < end)
{
blocks.emplace(value);
}
}
}
// Detect tail calls
std::deque<u32> task{result.begin(), result.end()};
while (!task.empty())
{
const u32 f_start = task.front();
const auto f_up = result.upper_bound(f_start);
if (f_up != result.end()) for (vm::ptr<u32> ptr = vm::cast(f_start); ptr.addr() < *f_up; ptr++)
{
const u32 value = *ptr;
const auto type = s_ppu_itype.decode(value);
const ppu_opcode_t op{value};
if (type == ppu_itype::B || type == ppu_itype::BC)
{
const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16);
if (!op.lk && target % 4 == 0 && target >= start && target < end && (target < f_start || target >= *f_up))
{
auto _lower = result.lower_bound(target);
if (*_lower == target || _lower == result.begin())
{
continue;
}
const u32 f2_end = *_lower;
const u32 f2_start = *--_lower;
if (ppu_is_coherent(f2_start, f2_end, target))
{
continue;
}
LOG_NOTICE(LOADER, "Tail call: 0x%x -> 0x%x", ptr, target);
result.emplace(target);
// Rescan two new functions if the insertion took place
task.push_back(target);
task.push_back(f2_start);
}
}
}
task.pop_front();
}
// Fill (addr, size) vector
std::vector<std::pair<u32, u32>> vr;
for (auto it = result.begin(), end = --result.end(); it != end; it++)
{
const u32 addr = *it;
const auto _up = result.upper_bound(addr);
// Set initial (addr, size)
vr.emplace_back(std::make_pair(addr, *_up - addr));
// Analyse function against its end
for (u32& size = vr.back().second; size;)
{
const auto next = result.upper_bound(addr + size);
if (next != result.end() && ppu_is_coherent(addr, *next, addr + size))
{
// Extend and check again
const u32 new_size = *next - addr;
LOG_NOTICE(LOADER, "Extended: 0x%x (0x%x --> 0x%x)", addr, size, new_size);
size = new_size;
continue;
}
break;
}
}
// Add blocks as (addr, 0) // TODO
for (auto value : blocks)
{
vr.emplace_back(std::make_pair(value, 0));
}
// Print some stats
//{
// std::multimap<u64, const char*, std::greater<u64>> sorted;
// for (const auto& pair : stats)
// sorted.emplace(pair.second, pair.first);
// for (const auto& stat : sorted)
// LOG_NOTICE(PPU, "Stats: (%llu) %s", stat.first, stat.second);
//}
return vr;
}
static void ppu_validate(const std::string& fname, const std::vector<std::pair<u32, u32>>& funcs, u32 reloc)
{
// Load custom PRX configuration if available
if (fs::file yml{fname + ".yml"})
{
const auto cfg = YAML::Load(yml.to_string());
u32 index = 0;
// Validate detected functions using information provided
for (const auto func : cfg["functions"])
{
const u32 addr = func["addr"].as<u32>(-1);
const u32 size = func["size"].as<u32>(0);
if (addr != -1 && index < funcs.size())
{
u32 found = funcs[index].first - reloc;
while (addr > found && index + 1 < funcs.size())
{
LOG_ERROR(LOADER, "%s.yml : validation failed at 0x%x (0x%x, 0x%x)", fname, found, addr, size);
index++;
found = funcs[index].first - reloc;
}
if (addr < found)
{
LOG_ERROR(LOADER, "%s.yml : function not found (0x%x, 0x%x)", fname, addr, size);
continue;
}
if (size && size < funcs[index].second)
{
LOG_WARNING(LOADER, "%s.yml : function size mismatch at 0x%x(size=0x%x) (0x%x, 0x%x)", fname, found, funcs[index].second, addr, size);
}
if (size > funcs[index].second)
{
LOG_ERROR(LOADER, "%s.yml : function size mismatch at 0x%x(size=0x%x) (0x%x, 0x%x)", fname, found, funcs[index].second, addr, size);
}
index++;
}
else
{
LOG_ERROR(LOADER, "%s.yml : function not found at the end (0x%x, 0x%x)", fname, addr, size);
break;
}
}
if (!index)
{
return; // ???
}
while (index < funcs.size())
{
if (funcs[index].second)
{
LOG_ERROR(LOADER, "%s.yml : function not covered at 0x%x (size=0x%x)", fname, funcs[index].first, funcs[index].second);
}
index++;
}
LOG_SUCCESS(LOADER, "%s.yml : validation completed", fname);
}
}
template<>
std::shared_ptr<lv2_prx_t> ppu_prx_loader::load() const
{
std::vector<u32> segments;
std::vector<std::pair<u32, u32>> segments;
for (const auto& prog : progs)
{
@ -753,7 +1148,7 @@ std::shared_ptr<lv2_prx_t> ppu_prx_loader::load() const
std::memcpy(vm::base(addr), prog.bin.data(), file_size);
LOG_WARNING(LOADER, "**** Loaded to 0x%x (size=0x%x)", addr, mem_size);
segments.push_back(addr);
segments.emplace_back(std::make_pair(addr, mem_size));
}
break;
@ -787,8 +1182,8 @@ std::shared_ptr<lv2_prx_t> ppu_prx_loader::load() const
{
const auto& rel = reinterpret_cast<const ppu_prx_relocation_info&>(prog.bin[i]);
const u32 raddr = vm::cast(segments.at(rel.index_addr) + rel.offset, HERE);
const u64 rdata = segments.at(rel.index_value) + rel.ptr.addr();
const u32 raddr = vm::cast(segments.at(rel.index_addr).first + rel.offset, HERE);
const u64 rdata = segments.at(rel.index_value).first + rel.ptr.addr();
switch (const u32 type = rel.type)
{
@ -853,14 +1248,24 @@ std::shared_ptr<lv2_prx_t> ppu_prx_loader::load() const
};
// Access library information (TODO)
const auto& lib_info = vm::_ref<const ppu_prx_library_info>(vm::cast(segments[0] + progs[0].p_paddr - progs[0].p_offset, HERE));
const auto& lib_name = std::string(lib_info.name);
const auto& lib_info = vm::cptr<ppu_prx_library_info>(vm::cast(segments[0].first + progs[0].p_paddr - progs[0].p_offset, HERE));
const auto& lib_name = std::string(lib_info->name);
LOG_WARNING(LOADER, "Library %s (toc=0x%x, rtoc=0x%x):", lib_name, lib_info.toc, lib_info.toc + segments[0]);
LOG_WARNING(LOADER, "Library %s (rtoc=0x%x):", lib_name, lib_info->toc);
prx->specials = ppu_load_exports(link, lib_info.exports_start, lib_info.exports_end);
prx->specials = ppu_load_exports(link, lib_info->exports_start, lib_info->exports_end);
ppu_load_imports(link, lib_info.imports_start, lib_info.imports_end);
const std::initializer_list<u32> addr_list
{
ppu_load_imports(link, lib_info->imports_start, lib_info->imports_end),
lib_info.addr(),
lib_info->imports_start,
lib_info->exports_start,
};
// Get functions
prx->func = ppu_analyse(segments[0].first, std::min(addr_list), segments, lib_info->toc);
}
else
{
@ -887,9 +1292,18 @@ void ppu_exec_loader::load() const
// Access linkage information object
const auto link = fxm::get_always<ppu_linkage_info>();
// Segment info
std::vector<std::pair<u32, u32>> segments;
// Functions
std::vector<std::pair<u32, u32>> exec_set;
u32 exec_end{};
// Allocate memory at fixed positions
for (const auto& prog : progs)
{
LOG_NOTICE(LOADER, "** Segment: p_type=0x%x, p_vaddr=0x%llx, p_filesz=0x%llx, p_memsz=0x%llx, flags=0x%x", prog.p_type, prog.p_vaddr, prog.p_filesz, prog.p_memsz, prog.p_flags);
const u32 addr = vm::cast(prog.p_vaddr, HERE);
const u32 size = fmt::narrow<u32>("Invalid p_memsz: 0x%llx" HERE, prog.p_memsz);
@ -902,6 +1316,11 @@ void ppu_exec_loader::load() const
throw fmt::exception("vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size);
std::memcpy(vm::base(addr), prog.bin.data(), prog.bin.size());
segments.emplace_back(std::make_pair(addr, size));
if (prog.p_flags & 1) // Test EXEC flag
exec_end = addr + size;
}
}
@ -987,13 +1406,19 @@ void ppu_exec_loader::load() const
const auto& proc_prx_param = vm::_ref<const ppu_proc_prx_param_t>(vm::cast(prog.p_vaddr, HERE));
LOG_NOTICE(LOADER, "* libent_start = *0x%x", proc_prx_param.libent_start);
LOG_NOTICE(LOADER, "* libstub_start = *0x%x", proc_prx_param.libstub_start);
if (proc_prx_param.magic != 0x1b434cec)
{
throw fmt::exception("Bad magic! (0x%x)", proc_prx_param.magic);
}
ppu_load_exports(link, proc_prx_param.libent_start, proc_prx_param.libent_end);
ppu_load_imports(link, proc_prx_param.libstub_start, proc_prx_param.libstub_end);
const u32 min_addr = ppu_load_imports(link, proc_prx_param.libstub_start, proc_prx_param.libstub_end);
exec_end = std::min<u32>(min_addr, exec_end);
}
break;
}
@ -1035,10 +1460,16 @@ void ppu_exec_loader::load() const
const auto prx = loader.load();
// Register start function
if (prx->start)
{
start_funcs.push_back(prx->start.addr());
}
// Add functions
exec_set.insert(exec_set.end(), prx->func.begin(), prx->func.end());
ppu_validate(lle_dir + '/' + name, prx->func, prx->func[0].first);
}
else
{
@ -1167,6 +1598,17 @@ void ppu_exec_loader::load() const
}
}
// Analyse executable
const u32 entry_rtoc = vm::read32(vm::cast(header.e_entry, HERE) + 4);
const auto funcs = ppu_analyse(segments[0].first, exec_end, segments, entry_rtoc);
ppu_validate(vfs::get(Emu.GetPath()), funcs, 0);
for (const auto& pair : funcs)
{
exec_set.emplace_back(pair);
}
// TODO: adjust for liblv2 loading option
using namespace ppu_instructions;
@ -1177,7 +1619,7 @@ void ppu_exec_loader::load() const
static const int branch_size = 10 * 4;
auto make_branch = [](vm::ptr<u32>& ptr, u32 addr)
auto make_branch = [](vm::ptr<u32>& ptr, u32 addr, bool last)
{
const u32 stub = vm::read32(addr);
const u32 rtoc = vm::read32(addr + 4);
@ -1189,7 +1631,7 @@ void ppu_exec_loader::load() const
*ptr++ = ORI(r2, r2, rtoc & 0xffff);
*ptr++ = ORIS(r2, r2, rtoc >> 16);
*ptr++ = MTCTR(r0);
*ptr++ = BCTRL();
*ptr++ = last ? BCTR() : BCTRL();
};
auto entry = vm::ptr<u32>::make(vm::alloc(48 + branch_size * (::size32(start_funcs) + 1), vm::main));
@ -1217,7 +1659,7 @@ void ppu_exec_loader::load() const
// Reset arguments (TODO)
*entry++ = LI(r3, 0);
*entry++ = LI(r4, 0);
make_branch(entry, f);
make_branch(entry, f, false);
}
// Restore initialization args
@ -1229,7 +1671,13 @@ void ppu_exec_loader::load() const
*entry++ = MR(r12, r19);
// Branch to initialization
make_branch(entry, vm::cast(header.e_entry, HERE));
make_branch(entry, static_cast<u32>(header.e_entry), true);
// Register entry function (addr, size)
exec_set.emplace_back(std::make_pair(entry.addr() & -0x1000, entry.addr() & 0xfff));
// Initialize recompiler
ppu_initialize("", exec_set, static_cast<u32>(header.e_entry));
auto ppu = idm::make_ptr<PPUThread>("main_thread");