Intial commit for advanced tracer

This commit is contained in:
S Gopal Rajagopal 2014-10-25 06:38:47 +05:30
parent ce21a9d250
commit 6bc0ce8046
4 changed files with 1596 additions and 1609 deletions

View file

@ -55,9 +55,13 @@ u64 rotr64(const u64 x, const u8 n) { return (x >> n) | (x << (64 - n)); }
#define rotl64 _rotl64
#define rotr64 _rotr64
namespace ppu_recompiler_llvm {
class Compiler;
}
class PPUInterpreter : public PPUOpcodes
{
friend class PPULLVMRecompiler;
friend class ppu_recompiler_llvm::Compiler;
private:
PPUThread& CPU;

File diff suppressed because it is too large Load diff

View file

@ -11,40 +11,93 @@
#include "llvm/ExecutionEngine/JIT.h"
#include "llvm/PassManager.h"
namespace ppu_recompiler_llvm {
/// Branch type
enum BranchType {
None,
FunctionCall,
Block,
Return,
};
/// Unique id of a block
union BlockId {
u64 block_id;
struct {
/// Address of the block
u32 address;
/// The type of the block
BranchType type;
};
};
/// An execution trace.
struct ExecutionTrace {
/// The function in which this trace was found
u32 function_address;
/// Execution trace type
enum {
Linear,
Loop,
} type;
/// Sequence of blocks enountered in this trace
std::vector<BlockId> blocks;
};
/// A fragment of PPU code. A list of (block, list of next blocks) pairs.
typedef std::vector<std::pair<BlockId, std::vector<BlockId>>> CodeFragment;
/// Pointer to a function built by compiling a fragment of PPU code
typedef u64(*CompiledCodeFragment)(PPUThread * ppu_state, PPUInterpreter * interpreter);
struct PPUState;
/// PPU recompiler that uses LLVM for code generation and optimization
class PPULLVMRecompiler : public ThreadBase, protected PPUOpcodes, protected PPCDecoder {
/// PPU compiler that uses LLVM for code generation and optimization
class Compiler : protected PPUOpcodes, protected PPCDecoder {
public:
typedef void(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter);
struct Stats {
/// Time spent building the LLVM IR
std::chrono::nanoseconds ir_build_time;
PPULLVMRecompiler();
/// Time spent optimizing
std::chrono::nanoseconds optimization_time;
PPULLVMRecompiler(const PPULLVMRecompiler & other) = delete;
PPULLVMRecompiler(PPULLVMRecompiler && other) = delete;
/// Time spent translating LLVM IR to machine code
std::chrono::nanoseconds translation_time;
virtual ~PPULLVMRecompiler();
/// Total time
std::chrono::nanoseconds total_time;
PPULLVMRecompiler & operator = (const PPULLVMRecompiler & other) = delete;
PPULLVMRecompiler & operator = (PPULLVMRecompiler && other) = delete;
/// Contains the number of times interpreter fallback was used
std::map<std::string, u64> interpreter_fallback_stats;
};
/// Get the executable for the code starting at address
std::pair<Executable, u32> GetExecutable(u32 address);
Compiler();
/// Release an executable earlier obtained through GetExecutable
void ReleaseExecutable(u32 address, u32 revision);
Compiler(const Compiler & other) = delete;
Compiler(Compiler && other) = delete;
/// Request the code at the sepcified address to be compiled
void RequestCompilation(u32 address);
virtual ~Compiler();
/// Get the current revision
u32 GetCurrentRevision();
Compiler & operator = (const Compiler & other) = delete;
Compiler & operator = (Compiler && other) = delete;
/// Compile a code fragment
CompiledCodeFragment Compile(const std::string & name, const CodeFragment & code_fragment);
/// Free a compiled code fragment
void FreeCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment);
/// Retrieve compiler stats
Stats GetStats();
/// Execute all tests
void RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter);
void Task() override;
protected:
void Decode(const u32 code) override;
@ -449,45 +502,8 @@ protected:
void UNK(const u32 code, const u32 opcode, const u32 gcode) override;
private:
struct ExecutableInfo {
/// Pointer to the executable
Executable executable;
/// Size of the executable
size_t size;
/// Number of PPU instructions compiled into this executable
u32 num_instructions;
/// List of blocks that this executable refers to that have not been hit yet
std::list<u32> unhit_blocks_list;
/// LLVM function corresponding to the executable
llvm::Function * llvm_function;
};
/// Lock for accessing m_compiled_shared
// TODO: Use a RW lock
std::mutex m_compiled_shared_lock;
/// Sections that have been compiled. This data store is shared with the execution threads.
/// Keys are starting address of the section and ~revision. Data is pointer to the executable and its reference count.
std::map<std::pair<u32, u32>, std::pair<Executable, u32>> m_compiled_shared;
/// Lock for accessing m_uncompiled_shared
std::mutex m_uncompiled_shared_lock;
/// Current revision. This is incremented everytime a section is compiled.
std::atomic<u32> m_revision;
/// Sections that have not been compiled yet. This data store is shared with the execution threads.
std::list<u32> m_uncompiled_shared;
/// Set of all blocks that have been hit
std::set<u32> m_hit_blocks;
/// Sections that have been compiled. Keys are starting address of the section and ~revision.
std::map<std::pair<u32, u32>, ExecutableInfo> m_compiled;
/// Map from compiled code fragment to the LLVM function for the code fragment
std::map<CompiledCodeFragment, llvm::Function *> m_compiled;
/// LLVM context
llvm::LLVMContext * m_llvm_context;
@ -512,56 +528,29 @@ private:
/// The function being compiled
llvm::Function * m_current_function;
/// List of blocks to be compiled in the current function being compiled
std::list<u32> m_current_function_uncompiled_blocks_list;
/// List of blocks that the current function refers to but have not been hit yet
std::list<u32> m_current_function_unhit_blocks_list;
/// The list of next blocks for the current block
const std::vector<BlockId> * m_current_block_next_blocks;
/// Address of the current instruction
u32 m_current_instruction_address;
/// Number of instructions in this section
u32 m_num_instructions;
/// Compiler stats
Stats m_stats;
/// Time spent building the LLVM IR
std::chrono::nanoseconds m_ir_build_time;
/// Get the name of the basic block for the specified address
std::string GetBasicBlockNameFromAddress(u32 address);
/// Time spent optimizing
std::chrono::nanoseconds m_optimizing_time;
/// Get the basic block in for the specified address.
llvm::BasicBlock * GetBasicBlockFromAddress(u32 address, llvm::Function * function, bool create_if_not_exist = false);
/// Time spent translating LLVM IR to machine code
std::chrono::nanoseconds m_translation_time;
/// Get PPU state pointer argument
llvm::Value * GetPPUStateArg();
/// Time spent compiling
std::chrono::nanoseconds m_compilation_time;
/// Get interpreter pointer argument
llvm::Value * GetInterpreterArg();
/// Time spent idling
std::chrono::nanoseconds m_idling_time;
/// Total time
std::chrono::nanoseconds m_total_time;
/// Contains the number of times the interpreter fallback was used
std::map<std::string, u64> m_interpreter_fallback_stats;
/// Get the block in function for the instruction at the specified address.
llvm::BasicBlock * GetBlockInFunction(u32 address, llvm::Function * function, bool create_if_not_exist = false);
/// Compile the section startin at address
void Compile(u32 address);
/// Remove old versions of executables that are no longer used by any execution thread
void RemoveUnusedOldVersions();
/// Test whether the blocks needs to be compiled
bool NeedsCompiling(u32 address);
/// Get PPU state pointer
llvm::Value * GetPPUState();
/// Get interpreter pointer
llvm::Value * GetInterpreter();
/// Get tracer pointer argument
llvm::Value * GetTracerArg();
/// Get a bit
llvm::Value * GetBit(llvm::Value * val, u32 n);
@ -687,7 +676,7 @@ private:
llvm::Value * CheckBranchCondition(u32 bo, u32 bi);
/// Create IR for a branch instruction
void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk);
void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk, bool target_is_lr = false);
/// Read from memory
llvm::Value * ReadMemory(llvm::Value * addr_i64, u32 bits, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true);
@ -707,6 +696,9 @@ private:
template<class ReturnType, class Func, class... Args>
llvm::Value * Call(const char * name, Func function, Args... args);
/// Tests if the instruction is a branch instruction or not
bool IsBranchInstruction(u32 instruction);
/// Test an instruction against the interpreter
template <class PPULLVMRecompilerFn, class PPUInterpreterFn, class... Args>
void VerifyInstructionAgainstInterpreter(const char * name, PPULLVMRecompilerFn recomp_fn, PPUInterpreterFn interp_fn, PPUState & input_state, Args... args);
@ -724,34 +716,87 @@ private:
static void InitRotateMask();
};
/// PPU emulator that uses LLVM to convert PPU instructions to host CPU instructions
class PPULLVMEmulator : public CPUDecoder {
/// Analyses execution traces and finds hot paths
class Profiler {
};
class RecompilationEngine {
public:
PPULLVMEmulator(PPUThread & ppu);
PPULLVMEmulator() = delete;
virtual ~RecompilationEngine() = default;
PPULLVMEmulator(const PPULLVMEmulator & other) = delete;
PPULLVMEmulator(PPULLVMEmulator && other) = delete;
/// Get the compiled code fragment for the specified address
CompiledCodeFragment GetCompiledCodeFragment(u32 address);
virtual ~PPULLVMEmulator();
/// Release a compiled code fragment earlier obtained through GetCompiledCodeFragment
void ReleaseCompiledCodeFragment(CompiledCodeFragment compiled_code_fragment);
PPULLVMEmulator & operator = (const PPULLVMEmulator & other) = delete;
PPULLVMEmulator & operator = (PPULLVMEmulator && other) = delete;
/// Get the current revision
u32 GetCurrentRevision();
/// Get a pointer to the instance of this class
static std::shared_ptr<RecompilationEngine> GetInstance();
private:
RecompilationEngine() = default;
RecompilationEngine(const RecompilationEngine & other) = delete;
RecompilationEngine(RecompilationEngine && other) = delete;
RecompilationEngine & operator = (const RecompilationEngine & other) = delete;
RecompilationEngine & operator = (RecompilationEngine && other) = delete;
/// Mutex used to prevent multiple creation
static std::mutex s_mutex;
/// The instance
static std::shared_ptr<RecompilationEngine> s_the_instance;
};
/// Finds interesting execution sequences
class Tracer {
public:
Tracer();
Tracer(const Tracer & other) = delete;
Tracer(Tracer && other) = delete;
virtual ~Tracer();
Tracer & operator = (const Tracer & other) = delete;
Tracer & operator = (Tracer && other) = delete;
/// Notify the tracer that a branch was encountered
void Trace(BranchType branch_type, u32 address);
/// Notify the tracer that the execution sequence is being terminated.
void Terminate();
private:
/// Current execution trace
std::vector<BlockId> m_trace;
/// Call stack
std::vector<u32> m_stack;
};
/// PPU execution engine
class ExecutionEngine : public CPUDecoder {
public:
ExecutionEngine(PPUThread & ppu);
ExecutionEngine() = delete;
ExecutionEngine(const ExecutionEngine & other) = delete;
ExecutionEngine(ExecutionEngine && other) = delete;
virtual ~ExecutionEngine();
ExecutionEngine & operator = (const ExecutionEngine & other) = delete;
ExecutionEngine & operator = (ExecutionEngine && other) = delete;
u8 DecodeMemory(const u32 address) override;
private:
struct ExecutableInfo {
/// Pointer to the executable
PPULLVMRecompiler::Executable executable;
/// The revision of the executable
u32 revision;
/// Number of times the executable was hit
u32 num_hits;
};
/// PPU processor context
PPUThread & m_ppu;
@ -761,29 +806,27 @@ private:
/// PPU instruction Decoder
PPUDecoder m_decoder;
/// Set to true if the last executed instruction was a branch
bool m_last_instr_was_branch;
/// Execution tracer
Tracer m_tracer;
/// The time at which the m_address_to_executable cache was last cleared
/// Set to true if the last executed instruction was a branch
BranchType m_last_branch_type;
/// The time at which the m_address_to_compiled_code_fragment cache was last cleared
std::chrono::high_resolution_clock::time_point m_last_cache_clear_time;
/// The revision of the recompiler to which this thread is synced
u32 m_recompiler_revision;
/// Address to executable map. Key is address.
std::unordered_map<u32, ExecutableInfo> m_address_to_executable;
/// Address to compiled code fragmnet lookup. Key is address. Data is the pair (compiled code fragment, times hit).
std::unordered_map<u32, std::pair<CompiledCodeFragment, u32>> m_address_to_compiled_code_fragment;
/// Sections that have not been compiled yet. Key is starting address of the section.
std::unordered_map<u32, u64> m_uncompiled;
/// Number of instances of this class
static u32 s_num_instances;
/// Mutex used prevent multiple instances of the recompiler from being created
static std::mutex s_recompiler_mutex;
/// PPU to LLVM recompiler
static PPULLVMRecompiler * s_recompiler;
/// Recompilation engine
std::shared_ptr<RecompilationEngine> m_recompilation_engine;
};
// Get the branch type from a branch instruction
BranchType GetBranchTypeFromInstruction(u32 instruction);
}
#endif // PPU_LLVM_RECOMPILER_H

View file

@ -630,7 +630,7 @@
<Filter>Emu\Cell</Filter>
</ClCompile>
<ClCompile Include="Emu\Cell\PPULLVMRecompilerTests.cpp">
<Filter>Source Files</Filter>
<Filter>Emu\Cell</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>