mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-03 21:41:26 +12:00
Finalize PPU migration to the frame pass system
This commit is contained in:
parent
cba658baba
commit
7979c5d9eb
4 changed files with 74 additions and 56 deletions
|
@ -4,17 +4,17 @@
|
||||||
|
|
||||||
LOG_CHANNEL(jit_log, "JIT");
|
LOG_CHANNEL(jit_log, "JIT");
|
||||||
|
|
||||||
#define STDOUT_DEBUG
|
#define STDOUT_DEBUG 0
|
||||||
|
|
||||||
#ifndef STDOUT_DEBUG
|
#if STDOUT_DEBUG
|
||||||
#define DPRINT jit_log.trace
|
|
||||||
#else
|
|
||||||
#define DPRINT(...)\
|
#define DPRINT(...)\
|
||||||
do {\
|
do {\
|
||||||
printf(__VA_ARGS__);\
|
printf(__VA_ARGS__);\
|
||||||
printf("\n");\
|
printf("\n");\
|
||||||
fflush(stdout);\
|
fflush(stdout);\
|
||||||
} while (0)
|
} while (0)
|
||||||
|
#else
|
||||||
|
#define DPRINT jit_log.trace
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace aarch64
|
namespace aarch64
|
||||||
|
@ -37,15 +37,9 @@ namespace aarch64
|
||||||
using instruction_info_t = GHC_frame_preservation_pass::instruction_info_t;
|
using instruction_info_t = GHC_frame_preservation_pass::instruction_info_t;
|
||||||
using function_info_t = GHC_frame_preservation_pass::function_info_t;
|
using function_info_t = GHC_frame_preservation_pass::function_info_t;
|
||||||
|
|
||||||
GHC_frame_preservation_pass::GHC_frame_preservation_pass(
|
GHC_frame_preservation_pass::GHC_frame_preservation_pass(const config_t& configuration)
|
||||||
u32 hv_ctx_offset,
|
: execution_context(configuration)
|
||||||
const std::vector<std::pair<std::string, gpr>>& base_register_lookup,
|
{}
|
||||||
std::function<bool(const std::string&)> exclusion_callback)
|
|
||||||
{
|
|
||||||
execution_context.base_register_lookup = base_register_lookup;
|
|
||||||
execution_context.hypervisor_context_offset = hv_ctx_offset;
|
|
||||||
this->exclusion_callback = exclusion_callback;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GHC_frame_preservation_pass::reset()
|
void GHC_frame_preservation_pass::reset()
|
||||||
{
|
{
|
||||||
|
@ -94,6 +88,8 @@ namespace aarch64
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (execution_context.use_stack_frames)
|
||||||
|
{
|
||||||
// Stack frame estimation. SPU code can be very long and consumes several KB of stack.
|
// Stack frame estimation. SPU code can be very long and consumes several KB of stack.
|
||||||
u32 stack_frame_size = 128u;
|
u32 stack_frame_size = 128u;
|
||||||
// Actual ratio is usually around 1:4
|
// Actual ratio is usually around 1:4
|
||||||
|
@ -106,11 +102,15 @@ namespace aarch64
|
||||||
stack_frame_size *= std::min(spill_pages, 32u); // 128 to 4k dynamic. It is unlikely that any frame consumes more than 4096 bytes
|
stack_frame_size *= std::min(spill_pages, 32u); // 128 to 4k dynamic. It is unlikely that any frame consumes more than 4096 bytes
|
||||||
|
|
||||||
result.stack_frame_size = stack_frame_size;
|
result.stack_frame_size = stack_frame_size;
|
||||||
|
}
|
||||||
|
|
||||||
result.instruction_count = f.getInstructionCount();
|
result.instruction_count = f.getInstructionCount();
|
||||||
result.num_external_calls = 0;
|
result.num_external_calls = 0;
|
||||||
|
|
||||||
// The LR is not spared by LLVM in cases where there is a lot of spilling.
|
// The LR is not spared by LLVM in cases where there is a lot of spilling.
|
||||||
// This is another thing to be moved to a MachineFunction pass.
|
// This is much easier to manage with a custom LLVM branch as we can just mark X30 as off-limits as a GPR.
|
||||||
|
// This is another thing to be moved to a MachineFunction pass. Ideally we should check the instruction stream for writes to LR and reload it on exit.
|
||||||
|
// For now, assume it is dirtied if the function is of any reasonable length.
|
||||||
result.clobbers_x30 = result.instruction_count > 32;
|
result.clobbers_x30 = result.instruction_count > 32;
|
||||||
|
|
||||||
for (auto& bb : f)
|
for (auto& bb : f)
|
||||||
|
@ -323,13 +323,7 @@ namespace aarch64
|
||||||
llvm::Instruction* original_inst = llvm::dyn_cast<llvm::Instruction>(bit);
|
llvm::Instruction* original_inst = llvm::dyn_cast<llvm::Instruction>(bit);
|
||||||
irb->SetInsertPoint(ensure(llvm::dyn_cast<llvm::Instruction>(bit)));
|
irb->SetInsertPoint(ensure(llvm::dyn_cast<llvm::Instruction>(bit)));
|
||||||
|
|
||||||
if (function_info.stack_frame_size > 0)
|
// We're about to make a tail call. This means after this call, we're supposed to return immediately. In that case, don't link, lower to branch only.
|
||||||
{
|
|
||||||
// 1. Nuke the local stack frame if any
|
|
||||||
LLVM_ASM_VOID(frame_epilogue, irb, f.getContext());
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. We're about to make a tail call. This means after this call, we're supposed to return immediately. In that case, don't link, lower to branch only.
|
|
||||||
// Note that branches have some undesirable side-effects. For one, we lose the argument inputs, which the callee is expecting.
|
// Note that branches have some undesirable side-effects. For one, we lose the argument inputs, which the callee is expecting.
|
||||||
// This means we burn some cycles on every exit, but in return we do not require one instruction on the prologue + the ret chain is eliminated.
|
// This means we burn some cycles on every exit, but in return we do not require one instruction on the prologue + the ret chain is eliminated.
|
||||||
// No ret-chain also means two BBs can call each other indefinitely without running out of stack without relying on llvm to optimize that away.
|
// No ret-chain also means two BBs can call each other indefinitely without running out of stack without relying on llvm to optimize that away.
|
||||||
|
@ -374,6 +368,18 @@ namespace aarch64
|
||||||
exit_fn += x30_tail_restore;
|
exit_fn += x30_tail_restore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stack cleanup. We need to do this last to allow the spiller to find it's own spilled variables.
|
||||||
|
if (function_info.stack_frame_size > 0)
|
||||||
|
{
|
||||||
|
exit_fn += frame_epilogue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (execution_context.debug_info)
|
||||||
|
{
|
||||||
|
// Store x27 as our current address taking the place of LR (for debugging since bt is now useless)
|
||||||
|
exit_fn += "adr x27, .;\n";
|
||||||
|
}
|
||||||
|
|
||||||
auto target = ensure(ci->getCalledOperand());
|
auto target = ensure(ci->getCalledOperand());
|
||||||
args.push_back(target);
|
args.push_back(target);
|
||||||
|
|
||||||
|
|
|
@ -43,14 +43,20 @@ namespace aarch64
|
||||||
llvm::Function* callee; // Callee if any
|
llvm::Function* callee; // Callee if any
|
||||||
std::string callee_name; // Name of the callee.
|
std::string callee_name; // Name of the callee.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct config_t
|
||||||
|
{
|
||||||
|
bool debug_info = false; // Record debug information
|
||||||
|
bool use_stack_frames = true; // Allocate a stack frame for each function. The gateway can alternatively manage a global stack to use as scratch.
|
||||||
|
u32 hypervisor_context_offset = 0; // Offset within the "thread" object where we can find the hypervisor context (registers configured at gateway).
|
||||||
|
std::function<bool(const std::string&)> exclusion_callback; // [Optional] Callback run on each function before transform. Return "true" to exclude from frame processing.
|
||||||
|
std::vector<std::pair<std::string, gpr>> base_register_lookup; // [Optional] Function lookup table to determine the location of the "thread" context.
|
||||||
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::unordered_set<std::string> visited_functions;
|
std::unordered_set<std::string> visited_functions;
|
||||||
|
|
||||||
struct
|
config_t execution_context;
|
||||||
{
|
|
||||||
std::vector<std::pair<std::string, gpr>> base_register_lookup;
|
|
||||||
u32 hypervisor_context_offset;
|
|
||||||
} execution_context;
|
|
||||||
|
|
||||||
std::function<bool(const std::string&)> exclusion_callback;
|
std::function<bool(const std::string&)> exclusion_callback;
|
||||||
|
|
||||||
|
@ -63,10 +69,7 @@ namespace aarch64
|
||||||
gpr get_base_register_for_call(const std::string& callee_name);
|
gpr get_base_register_for_call(const std::string& callee_name);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
GHC_frame_preservation_pass(
|
GHC_frame_preservation_pass(const config_t& configuration);
|
||||||
u32 hv_ctx_offset,
|
|
||||||
const std::vector<std::pair<std::string, gpr>>& base_register_lookup = {},
|
|
||||||
std::function<bool(const std::string&)> exclusion_callback = {});
|
|
||||||
~GHC_frame_preservation_pass() = default;
|
~GHC_frame_preservation_pass() = default;
|
||||||
|
|
||||||
void run(llvm::IRBuilder<>* irb, llvm::Function& f) override;
|
void run(llvm::IRBuilder<>* irb, llvm::Function& f) override;
|
||||||
|
|
|
@ -3938,7 +3938,7 @@ llvm::InlineAsm* compile_inline_asm(
|
||||||
static inline
|
static inline
|
||||||
llvm::CallInst* llvm_asm(
|
llvm::CallInst* llvm_asm(
|
||||||
llvm::IRBuilder<>* irb,
|
llvm::IRBuilder<>* irb,
|
||||||
std::string& asm_,
|
const std::string& asm_,
|
||||||
llvm::ArrayRef<llvm::Value*> args,
|
llvm::ArrayRef<llvm::Value*> args,
|
||||||
const std::string& constraints,
|
const std::string& constraints,
|
||||||
llvm::LLVMContext& context)
|
llvm::LLVMContext& context)
|
||||||
|
|
|
@ -36,6 +36,7 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||||
|
|
||||||
// Initialize transform passes
|
// Initialize transform passes
|
||||||
#ifdef ARCH_ARM64
|
#ifdef ARCH_ARM64
|
||||||
|
{
|
||||||
// Base reg table definition
|
// Base reg table definition
|
||||||
// Assume all functions named __0x... are PPU functions and take the m_exec as the first arg
|
// Assume all functions named __0x... are PPU functions and take the m_exec as the first arg
|
||||||
std::vector<std::pair<std::string, aarch64::gpr>> base_reg_lookup = {
|
std::vector<std::pair<std::string, aarch64::gpr>> base_reg_lookup = {
|
||||||
|
@ -45,13 +46,21 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||||
{ "__", aarch64::x19 } // Probably link table entries
|
{ "__", aarch64::x19 } // Probably link table entries
|
||||||
};
|
};
|
||||||
|
|
||||||
|
aarch64::GHC_frame_preservation_pass::config_t config =
|
||||||
|
{
|
||||||
|
.debug_info = false, // Set to "true" to insert debug frames on x27
|
||||||
|
.use_stack_frames = false, // GW allocates 4k of scratch on the stack
|
||||||
|
.hypervisor_context_offset = ::offset32(&ppu_thread::hv_ctx),
|
||||||
|
.exclusion_callback = {}, // Unused, we don't have special exclusion functions on PPU
|
||||||
|
.base_register_lookup = base_reg_lookup
|
||||||
|
};
|
||||||
|
|
||||||
// Create transform pass
|
// Create transform pass
|
||||||
std::unique_ptr<translator_pass> ghc_fixup_pass = std::make_unique<aarch64::GHC_frame_preservation_pass>(
|
std::unique_ptr<translator_pass> ghc_fixup_pass = std::make_unique<aarch64::GHC_frame_preservation_pass>(config);
|
||||||
::offset32(&ppu_thread::hv_ctx),
|
|
||||||
base_reg_lookup);
|
|
||||||
|
|
||||||
// Register it
|
// Register it
|
||||||
register_transform_pass(ghc_fixup_pass);
|
register_transform_pass(ghc_fixup_pass);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Thread context struct (TODO: safer member access)
|
// Thread context struct (TODO: safer member access)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue