Add all the files

This commit is contained in:
Exzap 2022-08-22 22:21:23 +02:00
parent e3db07a16a
commit d60742f52b
1445 changed files with 430238 additions and 0 deletions

View file

@ -0,0 +1,11 @@
#pragma once
namespace Espresso
{
constexpr inline int CORE_COUNT = 3;
constexpr inline uint64 CORE_CLOCK = 1243125000;
constexpr inline uint64 BUS_CLOCK = 248625000;
constexpr inline uint64 TIMER_CLOCK = BUS_CLOCK / 4;
};

View file

@ -0,0 +1,5 @@
#include "Common/precompiled.h"
#include "DebugSymbolStorage.h"
FSpinlock DebugSymbolStorage::s_lock;
std::unordered_map<MPTR, DEBUG_SYMBOL_TYPE> DebugSymbolStorage::s_typeStorage;

View file

@ -0,0 +1,63 @@
#pragma once
#include "util/helpers/fspinlock.h"
enum class DEBUG_SYMBOL_TYPE
{
UNDEFINED,
CODE,
// big-endian types
U64,
U32,
U16,
U8,
S64,
S32,
S16,
S8,
FLOAT,
DOUBLE,
};
class DebugSymbolStorage
{
public:
static void StoreDataType(MPTR address, DEBUG_SYMBOL_TYPE type)
{
s_lock.acquire();
s_typeStorage[address] = type;
s_lock.release();
}
static DEBUG_SYMBOL_TYPE GetDataType(MPTR address)
{
s_lock.acquire();
auto itr = s_typeStorage.find(address);
if (itr == s_typeStorage.end())
{
s_lock.release();
return DEBUG_SYMBOL_TYPE::UNDEFINED;
}
DEBUG_SYMBOL_TYPE t = itr->second;
s_lock.release();
return t;
}
static void ClearRange(MPTR address, uint32 length)
{
s_lock.acquire();
while (length > 0)
{
auto itr = s_typeStorage.find(address);
if (itr != s_typeStorage.end())
s_typeStorage.erase(itr);
address += 4;
length -= 4;
}
s_lock.release();
}
private:
static FSpinlock s_lock;
static std::unordered_map<MPTR, DEBUG_SYMBOL_TYPE> s_typeStorage;
};

View file

@ -0,0 +1,573 @@
#include "gui/guiWrapper.h"
#include "Debugger.h"
#include "Cemu/PPCAssembler/ppcAssembler.h"
#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h"
#include "Cemu/ExpressionParser/ExpressionParser.h"
#include "gui/debugger/DebuggerWindow2.h"
#include "Cafe/OS/libs/coreinit/coreinit.h"
#if BOOST_OS_WINDOWS > 0
#include <Windows.h>
#endif
debuggerState_t debuggerState{ };
DebuggerBreakpoint* debugger_getFirstBP(uint32 address)
{
for (auto& it : debuggerState.breakpoints)
{
if (it->address == address)
return it;
}
return nullptr;
}
DebuggerBreakpoint* debugger_getFirstBP(uint32 address, uint8 bpType)
{
for (auto& it : debuggerState.breakpoints)
{
if (it->address == address)
{
DebuggerBreakpoint* bpItr = it;
while (bpItr)
{
if (bpItr->bpType == bpType)
return bpItr;
bpItr = bpItr->next;
}
return nullptr;
}
}
return nullptr;
}
bool debuggerBPChain_hasType(DebuggerBreakpoint* bp, uint8 bpType)
{
while (bp)
{
if (bp->bpType == bpType)
return true;
bp = bp->next;
}
return false;
}
void debuggerBPChain_add(uint32 address, DebuggerBreakpoint* bp)
{
bp->next = nullptr;
DebuggerBreakpoint* existingBP = debugger_getFirstBP(address);
if (existingBP)
{
while (existingBP->next)
existingBP = existingBP->next;
existingBP->next = bp;
return;
}
// no breakpoint chain exists for this address
debuggerState.breakpoints.push_back(bp);
}
uint32 debugger_getAddressOriginalOpcode(uint32 address)
{
auto bpItr = debugger_getFirstBP(address);
while (bpItr)
{
if (bpItr->bpType == DEBUGGER_BP_T_NORMAL || bpItr->bpType == DEBUGGER_BP_T_ONE_SHOT)
return bpItr->originalOpcodeValue;
bpItr = bpItr->next;
}
return memory_readU32(address);
}
void debugger_updateMemoryU32(uint32 address, uint32 newValue)
{
bool memChanged = false;
if (newValue != memory_readU32(address))
memChanged = true;
memory_writeU32(address, newValue);
if(memChanged)
PPCRecompiler_invalidateRange(address, address + 4);
}
void debugger_updateExecutionBreakpoint(uint32 address, bool forceRestore)
{
auto bpItr = debugger_getFirstBP(address);
bool hasBP = false;
uint32 originalOpcodeValue;
while (bpItr)
{
if (bpItr->isExecuteBP())
{
if (bpItr->enabled && forceRestore == false)
{
// write TW instruction to memory
debugger_updateMemoryU32(address, (31 << 26) | (4 << 1));
return;
}
else
{
originalOpcodeValue = bpItr->originalOpcodeValue;
hasBP = true;
}
}
bpItr = bpItr->next;
}
if (hasBP)
{
// restore instruction
debugger_updateMemoryU32(address, originalOpcodeValue);
}
}
void debugger_createExecuteBreakpoint(uint32 address)
{
// check if breakpoint already exists
auto existingBP = debugger_getFirstBP(address);
if (existingBP && debuggerBPChain_hasType(existingBP, DEBUGGER_BP_T_NORMAL))
return; // breakpoint already exists
// get original opcode at address
uint32 originalOpcode = debugger_getAddressOriginalOpcode(address);
// init breakpoint object
DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, originalOpcode, DEBUGGER_BP_T_NORMAL, true);
debuggerBPChain_add(address, bp);
debugger_updateExecutionBreakpoint(address);
}
void debugger_createSingleShotExecuteBreakpoint(uint32 address)
{
// check if breakpoint already exists
auto existingBP = debugger_getFirstBP(address);
if (existingBP && debuggerBPChain_hasType(existingBP, DEBUGGER_BP_T_ONE_SHOT))
return; // breakpoint already exists
// get original opcode at address
uint32 originalOpcode = debugger_getAddressOriginalOpcode(address);
// init breakpoint object
DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, originalOpcode, DEBUGGER_BP_T_ONE_SHOT, true);
debuggerBPChain_add(address, bp);
debugger_updateExecutionBreakpoint(address);
}
namespace coreinit
{
std::vector<std::thread::native_handle_type>& OSGetSchedulerThreads();
}
void debugger_updateMemoryBreakpoint(DebuggerBreakpoint* bp)
{
std::vector<std::thread::native_handle_type> schedulerThreadHandles = coreinit::OSGetSchedulerThreads();
#if BOOST_OS_WINDOWS > 0
debuggerState.activeMemoryBreakpoint = bp;
for (auto& hThreadNH : schedulerThreadHandles)
{
HANDLE hThread = (HANDLE)hThreadNH;
CONTEXT ctx{};
ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS;
SuspendThread(hThread);
GetThreadContext(hThread, &ctx);
if (debuggerState.activeMemoryBreakpoint)
{
ctx.Dr0 = (DWORD64)memory_getPointerFromVirtualOffset(bp->address);
ctx.Dr1 = (DWORD64)memory_getPointerFromVirtualOffset(bp->address);
ctx.Dr7 = 1 | (1 << 16) | (3 << 18); // enable dr0, track write, 4 byte length
ctx.Dr7 |= (4 | (3 << 20) | (3 << 22)); // enable dr1, track read+write, 4 byte length
}
else
{
ctx.Dr0 = (DWORD64)0;
ctx.Dr1 = (DWORD64)0;
ctx.Dr7 = 0; // disable dr0
}
SetThreadContext(hThread, &ctx);
ResumeThread(hThread);
}
#else
cemuLog_log(LogType::Force, "Debugger breakpoints are not supported");
#endif
}
void debugger_handleSingleStepException(uint32 drMask)
{
bool triggeredDR0 = (drMask & (1 << 0)) != 0; // write
bool triggeredDR1 = (drMask & (1 << 1)) != 0; // read
bool catchBP = false;
if (triggeredDR0 && triggeredDR1)
{
// write (and read) access
if (debuggerState.activeMemoryBreakpoint && debuggerState.activeMemoryBreakpoint->bpType == DEBUGGER_BP_T_MEMORY_WRITE)
catchBP = true;
}
else
{
// read access
if (debuggerState.activeMemoryBreakpoint && debuggerState.activeMemoryBreakpoint->bpType == DEBUGGER_BP_T_MEMORY_READ)
catchBP = true;
}
if (catchBP)
{
debugger_createSingleShotExecuteBreakpoint(ppcInterpreterCurrentInstance->instructionPointer + 4);
}
}
void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite)
{
// init breakpoint object
uint8 bpType;
if (onRead && onWrite)
assert_dbg();
else if (onRead)
bpType = DEBUGGER_BP_T_MEMORY_READ;
else
bpType = DEBUGGER_BP_T_MEMORY_WRITE;
DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, 0xFFFFFFFF, bpType, true);
debuggerBPChain_add(address, bp);
// disable any already existing memory breakpoint
if (debuggerState.activeMemoryBreakpoint)
{
debuggerState.activeMemoryBreakpoint->enabled = false;
debuggerState.activeMemoryBreakpoint = nullptr;
}
// activate new breakpoint
debugger_updateMemoryBreakpoint(bp);
}
void debugger_handleEntryBreakpoint(uint32 address)
{
if (!debuggerState.breakOnEntry)
return;
debugger_createExecuteBreakpoint(address);
}
void debugger_deleteBreakpoint(DebuggerBreakpoint* bp)
{
for (auto& it : debuggerState.breakpoints)
{
if (it->address == bp->address)
{
// for execution breakpoints make sure the instruction is restored
if (bp->isExecuteBP())
{
bp->enabled = false;
debugger_updateExecutionBreakpoint(bp->address);
}
// remove
if (it == bp)
{
// remove first in list
debuggerState.breakpoints.erase(std::remove(debuggerState.breakpoints.begin(), debuggerState.breakpoints.end(), bp), debuggerState.breakpoints.end());
DebuggerBreakpoint* nextBP = bp->next;
if (nextBP)
debuggerState.breakpoints.push_back(nextBP);
}
else
{
// remove from list
DebuggerBreakpoint* bpItr = it;
while (bpItr->next != bp)
{
bpItr = bpItr->next;
}
cemu_assert_debug(bpItr->next != bp);
bpItr->next = bp->next;
}
delete bp;
return;
}
}
}
void debugger_toggleExecuteBreakpoint(uint32 address)
{
auto existingBP = debugger_getFirstBP(address, DEBUGGER_BP_T_NORMAL);
if (existingBP)
{
// delete existing breakpoint
debugger_deleteBreakpoint(existingBP);
return;
}
// create new
debugger_createExecuteBreakpoint(address);
}
void debugger_forceBreak()
{
debuggerState.debugSession.shouldBreak = true;
}
bool debugger_isTrapped()
{
return debuggerState.debugSession.isTrapped;
}
void debugger_resume()
{
// if there is a breakpoint on the current instruction then do a single 'step into' to skip it
debuggerState.debugSession.run = true;
}
void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp)
{
DebuggerBreakpoint* bpItr = debugger_getFirstBP(address);
while (bpItr)
{
if (bpItr == bp)
{
if (bpItr->bpType == DEBUGGER_BP_T_NORMAL)
{
bp->enabled = state;
debugger_updateExecutionBreakpoint(address);
debuggerWindow_updateViewThreadsafe2();
}
else if (bpItr->isMemBP())
{
// disable other memory breakpoints
for (auto& it : debuggerState.breakpoints)
{
DebuggerBreakpoint* bpItr2 = it;
while (bpItr2)
{
if (bpItr2->isMemBP() && bpItr2 != bp)
{
bpItr2->enabled = false;
}
bpItr2 = bpItr2->next;
}
}
bpItr->enabled = state;
if (state)
debugger_updateMemoryBreakpoint(bpItr);
else
debugger_updateMemoryBreakpoint(nullptr);
debuggerWindow_updateViewThreadsafe2();
}
return;
}
bpItr = bpItr->next;
}
}
void debugger_createPatch(uint32 address, std::span<uint8> patchData)
{
DebuggerPatch* patch = new DebuggerPatch();
patch->address = address;
patch->length = patchData.size();
patch->data.resize(4);
patch->origData.resize(4);
memcpy(&patch->data.front(), patchData.data(), patchData.size());
memcpy(&patch->origData.front(), memory_getPointerFromVirtualOffset(address), patchData.size());
// get original data from breakpoints
if ((address & 3) != 0)
cemu_assert_debug(false);
for (sint32 i = 0; i < patchData.size() / 4; i++)
{
DebuggerBreakpoint* bpItr = debugger_getFirstBP(address);
while (bpItr)
{
if (bpItr->isExecuteBP())
{
*(uint32*)(&patch->origData.front() + i * 4) = _swapEndianU32(bpItr->originalOpcodeValue);
}
bpItr = bpItr->next;
}
}
// merge with existing patches if the ranges touch
for(sint32 i=0; i<debuggerState.patches.size(); i++)
{
auto& patchItr = debuggerState.patches[i];
if (address + patchData.size() >= patchItr->address && address <= patchItr->address + patchItr->length)
{
uint32 newAddress = std::min(patch->address, patchItr->address);
uint32 newEndAddress = std::max(patch->address + patch->length, patchItr->address + patchItr->length);
uint32 newLength = newEndAddress - newAddress;
DebuggerPatch* newPatch = new DebuggerPatch();
newPatch->address = newAddress;
newPatch->length = newLength;
newPatch->data.resize(newLength);
newPatch->origData.resize(newLength);
memcpy(&newPatch->data.front() + (address - newAddress), &patch->data.front(), patch->length);
memcpy(&newPatch->data.front() + (patchItr->address - newAddress), &patchItr->data.front(), patchItr->length);
memcpy(&newPatch->origData.front() + (address - newAddress), &patch->origData.front(), patch->length);
memcpy(&newPatch->origData.front() + (patchItr->address - newAddress), &patchItr->origData.front(), patchItr->length);
delete patch;
patch = newPatch;
delete patchItr;
// remove currently iterated patch
debuggerState.patches.erase(debuggerState.patches.begin()+i);
i--;
}
}
debuggerState.patches.push_back(patch);
// apply patch (if breakpoints exist then update those instead of actual data)
if ((address & 3) != 0)
cemu_assert_debug(false);
if ((patchData.size() & 3) != 0)
cemu_assert_debug(false);
for (sint32 i = 0; i < patchData.size() / 4; i++)
{
DebuggerBreakpoint* bpItr = debugger_getFirstBP(address);
bool hasActiveExecuteBP = false;
while (bpItr)
{
if (bpItr->isExecuteBP())
{
bpItr->originalOpcodeValue = *(uint32be*)(patchData.data() + i * 4);
if (bpItr->enabled)
hasActiveExecuteBP = true;
}
bpItr = bpItr->next;
}
if (hasActiveExecuteBP == false)
{
memcpy(memory_getPointerFromVirtualOffset(address + i * 4), patchData.data() + i * 4, 4);
PPCRecompiler_invalidateRange(address, address + 4);
}
}
}
bool debugger_hasPatch(uint32 address)
{
for (auto& patch : debuggerState.patches)
{
if (address + 4 > patch->address && address < patch->address + patch->length)
return true;
}
return false;
}
void debugger_stepInto(PPCInterpreter_t* hCPU, bool updateDebuggerWindow = true)
{
bool isRecEnabled = ppcRecompilerEnabled;
ppcRecompilerEnabled = false;
uint32 initialIP = debuggerState.debugSession.instructionPointer;
debugger_updateExecutionBreakpoint(initialIP, true);
PPCInterpreterSlim_executeInstruction(hCPU);
debugger_updateExecutionBreakpoint(initialIP);
debuggerState.debugSession.instructionPointer = hCPU->instructionPointer;
if(updateDebuggerWindow)
debuggerWindow_moveIP();
ppcRecompilerEnabled = isRecEnabled;
}
bool debugger_stepOver(PPCInterpreter_t* hCPU)
{
bool isRecEnabled = ppcRecompilerEnabled;
ppcRecompilerEnabled = false;
// disassemble current instruction
PPCDisassembledInstruction disasmInstr = { 0 };
uint32 initialIP = debuggerState.debugSession.instructionPointer;
debugger_updateExecutionBreakpoint(initialIP, true);
ppcAssembler_disassemble(initialIP, memory_readU32(initialIP), &disasmInstr);
if (disasmInstr.ppcAsmCode != PPCASM_OP_BL &&
disasmInstr.ppcAsmCode != PPCASM_OP_BCTRL)
{
// nothing to skip, use step-into
debugger_stepInto(hCPU);
debugger_updateExecutionBreakpoint(initialIP);
debuggerWindow_moveIP();
ppcRecompilerEnabled = isRecEnabled;
return false;
}
// create one-shot breakpoint at next instruction
debugger_createSingleShotExecuteBreakpoint(initialIP +4);
// step over current instruction (to avoid breakpoint)
debugger_stepInto(hCPU);
debuggerWindow_moveIP();
// restore breakpoints
debugger_updateExecutionBreakpoint(initialIP);
// run
ppcRecompilerEnabled = isRecEnabled;
return true;
}
void debugger_createPPCStateSnapshot(PPCInterpreter_t* hCPU)
{
memcpy(debuggerState.debugSession.ppcSnapshot.gpr, hCPU->gpr, sizeof(uint32) * 32);
memcpy(debuggerState.debugSession.ppcSnapshot.fpr, hCPU->fpr, sizeof(FPR_t) * 32);
debuggerState.debugSession.ppcSnapshot.spr_lr = hCPU->spr.LR;
for (uint32 i = 0; i < 32; i++)
debuggerState.debugSession.ppcSnapshot.cr[i] = hCPU->cr[i];
}
void debugger_enterTW(PPCInterpreter_t* hCPU)
{
debuggerState.debugSession.isTrapped = true;
debuggerState.debugSession.debuggedThreadMPTR = coreinitThread_getCurrentThreadMPTRDepr(hCPU);
debuggerState.debugSession.instructionPointer = hCPU->instructionPointer;
debuggerState.debugSession.hCPU = hCPU;
debugger_createPPCStateSnapshot(hCPU);
// remove one-shot breakpoint if it exists
DebuggerBreakpoint* singleshotBP = debugger_getFirstBP(debuggerState.debugSession.instructionPointer, DEBUGGER_BP_T_ONE_SHOT);
if (singleshotBP)
debugger_deleteBreakpoint(singleshotBP);
debuggerWindow_notifyDebugBreakpointHit2();
debuggerWindow_updateViewThreadsafe2();
// reset step control
debuggerState.debugSession.stepInto = false;
debuggerState.debugSession.stepOver = false;
debuggerState.debugSession.run = false;
while (true)
{
std::this_thread::sleep_for(std::chrono::milliseconds(1));
// check for step commands
if (debuggerState.debugSession.stepOver)
{
if (debugger_stepOver(hCPU))
{
debugger_createPPCStateSnapshot(hCPU);
break; // if true is returned, continue with execution
}
debugger_createPPCStateSnapshot(hCPU);
debuggerWindow_updateViewThreadsafe2();
debuggerState.debugSession.stepOver = false;
}
if (debuggerState.debugSession.stepInto)
{
debugger_stepInto(hCPU);
debugger_createPPCStateSnapshot(hCPU);
debuggerWindow_updateViewThreadsafe2();
debuggerState.debugSession.stepInto = false;
continue;
}
if (debuggerState.debugSession.run)
{
debugger_createPPCStateSnapshot(hCPU);
debugger_stepInto(hCPU, false);
PPCInterpreterSlim_executeInstruction(hCPU);
debuggerState.debugSession.instructionPointer = hCPU->instructionPointer;
debuggerState.debugSession.run = false;
break;
}
}
debuggerState.debugSession.isTrapped = false;
debuggerState.debugSession.hCPU = nullptr;
debuggerWindow_updateViewThreadsafe2();
debuggerWindow_notifyRun();
}
void debugger_shouldBreak(PPCInterpreter_t* hCPU)
{
if(debuggerState.debugSession.shouldBreak
// exclude emulator trampoline area
&& (hCPU->instructionPointer < MEMORY_CODE_TRAMPOLINE_AREA_ADDR || hCPU->instructionPointer > MEMORY_CODE_TRAMPOLINE_AREA_ADDR + MEMORY_CODE_TRAMPOLINE_AREA_SIZE))
{
debuggerState.debugSession.shouldBreak = false;
const uint32 address = (uint32)hCPU->instructionPointer;
assert_dbg();
//debugger_createBreakpoint(address, DEBUGGER_BP_TYPE_ONE_SHOT);
}
}
void debugger_addParserSymbols(class ExpressionParser& ep)
{
for (sint32 i = 0; i < 32; i++)
ep.AddConstant(fmt::format("r{}", i), debuggerState.debugSession.ppcSnapshot.gpr[i]);
}

View file

@ -0,0 +1,125 @@
#pragma once
#include <set>
#include "Cafe/HW/Espresso/PPCState.h"
//#define DEBUGGER_BP_TYPE_NORMAL (1<<0) // normal breakpoint
//#define DEBUGGER_BP_TYPE_ONE_SHOT (1<<1) // normal breakpoint
//#define DEBUGGER_BP_TYPE_MEMORY_READ (1<<2) // memory breakpoint
//#define DEBUGGER_BP_TYPE_MEMORY_WRITE (1<<3) // memory breakpoint
#define DEBUGGER_BP_T_NORMAL 0 // normal breakpoint
#define DEBUGGER_BP_T_ONE_SHOT 1 // normal breakpoint, deletes itself after trigger (used for stepping)
#define DEBUGGER_BP_T_MEMORY_READ 2 // memory breakpoint
#define DEBUGGER_BP_T_MEMORY_WRITE 3 // memory breakpoint
#define DEBUGGER_BP_T_GDBSTUB 1 // breakpoint created by GDBStub
#define DEBUGGER_BP_T_DEBUGGER 2 // breakpoint created by Cemu's debugger
struct DebuggerBreakpoint
{
uint32 address;
uint32 originalOpcodeValue;
mutable uint8 bpType;
mutable bool enabled;
mutable std::wstring comment;
mutable uint8 dbType = DEBUGGER_BP_T_DEBUGGER;
DebuggerBreakpoint(uint32 address, uint32 originalOpcode, uint8 bpType = 0, bool enabled = true, std::wstring comment = std::wstring())
:address(address), originalOpcodeValue(originalOpcode), bpType(bpType), enabled(enabled), comment(std::move(comment))
{
next = nullptr;
}
bool operator<(const DebuggerBreakpoint& rhs) const
{
return address < rhs.address;
}
bool operator==(const DebuggerBreakpoint& rhs) const
{
return address == rhs.address;
}
bool isExecuteBP() const
{
return bpType == DEBUGGER_BP_T_NORMAL || bpType == DEBUGGER_BP_T_ONE_SHOT;
}
bool isMemBP() const
{
return bpType == DEBUGGER_BP_T_MEMORY_READ || bpType == DEBUGGER_BP_T_MEMORY_WRITE;
}
DebuggerBreakpoint* next;
};
struct DebuggerPatch
{
uint32 address;
sint32 length;
std::vector<uint8> data;
std::vector<uint8> origData;
};
struct PPCSnapshot
{
uint32 gpr[32];
FPR_t fpr[32];
uint8 cr[32];
uint32 spr_lr;
};
typedef struct
{
bool breakOnEntry;
// breakpoints
std::vector<DebuggerBreakpoint*> breakpoints;
std::vector<DebuggerPatch*> patches;
DebuggerBreakpoint* activeMemoryBreakpoint;
// debugging state
struct
{
volatile bool shouldBreak; // debugger window requests a break asap
volatile bool isTrapped; // if set, breakpoint is active and stepping through the code is possible
uint32 debuggedThreadMPTR;
volatile uint32 instructionPointer;
PPCInterpreter_t* hCPU;
// step control
volatile bool stepOver;
volatile bool stepInto;
volatile bool run;
// snapshot of PPC state
PPCSnapshot ppcSnapshot;
}debugSession;
}debuggerState_t;
extern debuggerState_t debuggerState;
// new API
DebuggerBreakpoint* debugger_getFirstBP(uint32 address);
void debugger_toggleExecuteBreakpoint(uint32 address); // create/remove execute breakpoint
void debugger_createExecuteBreakpoint(uint32 address);
void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp);
void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite);
void debugger_handleEntryBreakpoint(uint32 address);
void debugger_deleteBreakpoint(DebuggerBreakpoint* bp);
void debugger_updateExecutionBreakpoint(uint32 address, bool forceRestore = false);
void debugger_createPatch(uint32 address, std::span<uint8> patchData);
bool debugger_hasPatch(uint32 address);
void debugger_forceBreak(); // force breakpoint at the next possible instruction
bool debugger_isTrapped();
void debugger_resume();
void debugger_enterTW(PPCInterpreter_t* hCPU);
void debugger_shouldBreak(PPCInterpreter_t* hCPU);
void debugger_addParserSymbols(class ExpressionParser& ep);

View file

@ -0,0 +1,197 @@
#pragma once
namespace Espresso
{
enum CR_BIT
{
CR_BIT_INDEX_LT = 0,
CR_BIT_INDEX_GT = 1,
CR_BIT_INDEX_EQ = 2,
CR_BIT_INDEX_SO = 3,
};
enum class PrimaryOpcode
{
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)
ZERO = 0,
VIRTUAL_HLE = 1,
// 3 = TWI
GROUP_4 = 4,
MULLI = 7,
SUBFIC = 8,
CMPLI = 10,
CMPI = 11,
ADDIC = 12,
ADDIC_ = 13,
ADDI = 14,
ADDIS = 15,
BC = 16, // conditional branch
GROUP_17 = 17, // SC
B = 18, // unconditional branch
GROUP_19 = 19,
RLWIMI = 20,
RLWINM = 21,
// 22 ?
RLWNM = 23,
ORI = 24,
ORIS = 25,
XORI = 26,
XORIS = 27,
ANDI_ = 28,
ANDIS_ = 29,
GROUP_31 = 31,
LWZ = 32,
LWZU = 33,
LBZ = 34,
LBZU = 35,
STW = 36,
STWU = 37,
STB = 38,
STBU = 39,
LHZ = 40,
LHZU = 41,
LHA = 42,
LHAU = 43,
STH = 44,
STHU = 45,
LMW = 46,
STMW = 47,
LFS = 48,
LFSU = 49,
LFD = 50,
LFDU = 51,
STFS = 52,
STFSU = 53,
STFD = 54,
STFDU = 55,
PSQ_L = 56,
PSQ_LU = 57,
// 58 ?
GROUP_59 = 59,
PSQ_ST = 60,
PSQ_STU = 61,
// 62 ?
GROUP_63 = 63,
};
enum class Opcode19
{
MCRF = 0,
BCLR = 16,
CRNOR = 33,
RFI = 50,
CRANDC = 129,
ISYNC = 150,
CRXOR = 193,
CRAND = 257,
CREQV = 289,
CRORC = 417,
CROR = 449,
BCCTR = 528
};
enum class OPCODE_31
{
};
inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); };
inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); };
struct BOField
{
BOField() {};
BOField(uint8 bo) : bo(bo) {};
bool conditionInverted() const
{
return (bo & 8) == 0;
}
bool decrementerIgnore() const
{
return (bo & 4) != 0;
}
bool decrementerMustBeZero() const
{
return (bo & 2) != 0;
}
bool conditionIgnore() const
{
return (bo & 16) != 0;
}
bool branchAlways()
{
return conditionIgnore() && decrementerIgnore();
}
uint8 bo;
};
inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK)
{
LI = opcode & 0x3fffffc;
if (LI & 0x02000000)
LI |= 0xfc000000;
AA = (opcode & 2) != 0;
LK = (opcode & 1) != 0;
}
inline void _decodeForm_D_branch(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK)
{
BD = opcode & 0xfffc;
if (BD & 0x8000)
BD |= 0xffff0000;
BO = { (uint8)((opcode >> 21) & 0x1F) };
BI = (opcode >> 16) & 0x1F;
AA = (opcode & 2) != 0;
LK = (opcode & 1) != 0;
}
inline void _decodeForm_D_SImm(uint32 opcode, uint32& rD, uint32& rA, uint32& imm)
{
rD = (opcode >> 21) & 0x1F;
rA = (opcode >> 16) & 0x1F;
imm = (uint32)(sint32)(sint16)(opcode & 0xFFFF);
}
inline void _decodeForm_XL(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
{
BO = { (uint8)((opcode >> 21) & 0x1F) };
BI = (opcode >> 16) & 0x1F;
LK = (opcode & 1) != 0;
}
inline void decodeOp_ADDI(uint32 opcode, uint32& rD, uint32& rA, uint32& imm)
{
_decodeForm_D_SImm(opcode, rD, rA, imm);
}
inline void decodeOp_B(uint32 opcode, uint32& LI, bool& AA, bool& LK)
{
// form I
_decodeForm_I(opcode, LI, AA, LK);
}
inline void decodeOp_BC(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK)
{
// form D
_decodeForm_D_branch(opcode, BD, BO, BI, AA, LK);
}
inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
{
// form XL (with BD field expected to be zero)
_decodeForm_XL(opcode, BO, BI, LK);
}
inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
{
// form XL (with BD field expected to be zero)
_decodeForm_XL(opcode, BO, BI, LK);
}
}

View file

@ -0,0 +1,978 @@
static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow)
{
if (hasOverflow)
{
hCPU->spr.XER |= XER_SO;
hCPU->spr.XER |= XER_OV;
}
else
{
hCPU->spr.XER &= ~XER_OV;
}
}
static bool checkAdditionOverflow(uint32 x, uint32 y, uint32 r)
{
// todo - update remaining *O instructions to use this function
/*
x y r result (has overflow)
0 0 0 0
1 0 0 0
0 1 0 0
1 1 0 1
0 0 1 1
1 0 1 0
0 1 1 0
1 1 1 0
*/
return (((x ^ r) & (y ^ r)) >> 31) != 0;
}
static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDO(PPCInterpreter_t* hCPU, uint32 opcode)
{
// untested (Don't Starve Giant Edition uses this instruction + BSO)
PPC_OPC_TEMPL3_XO();
uint64 result = (uint64)hCPU->gpr[rA] + (uint64)hCPU->gpr[rB];
hCPU->gpr[rD] = (uint32)result;
if (result >= 0x100000000ULL)
{
hCPU->spr.XER |= XER_SO;
hCPU->spr.XER |= XER_OV;
}
else
{
hCPU->spr.XER &= ~XER_OV;
}
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDC(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 a = hCPU->gpr[rA];
hCPU->gpr[rD] = a + hCPU->gpr[rB];
if (hCPU->gpr[rD] < a)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDCO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
hCPU->gpr[rD] = a + b;
if (hCPU->gpr[rD] < a)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
// set SO/OV
if (hCPU->gpr[rD] < a)
{
hCPU->spr.XER |= XER_OV;
hCPU->spr.XER |= XER_SO;
}
else
hCPU->spr.XER &= ~XER_OV;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDE(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
uint32 ca = hCPU->xer_ca;
hCPU->gpr[rD] = a + b + ca;
// update xer
if (ppc_carry_3(a, b, ca))
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDEO(PPCInterpreter_t* hCPU, uint32 opcode)
{
// used by DS Virtual Console (Super Mario 64 DS)
PPC_OPC_TEMPL3_XO();
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
uint32 ca = hCPU->xer_ca;
hCPU->gpr[rD] = a + b + ca;
// update xer carry
if (ppc_carry_3(a, b, ca))
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, b, hCPU->gpr[rD]));
// update CR
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode)
{
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
hCPU->gpr[rD] = a + imm;
// update XER
if (hCPU->gpr[rD] < a)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
hCPU->gpr[rD] = a + imm;
// update XER
if (hCPU->gpr[rD] < a)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
// update cr0 flags
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDIS(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm);
hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0) + imm;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDZE(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
uint32 a = hCPU->gpr[rA];
uint32 ca = hCPU->xer_ca;
hCPU->gpr[rD] = a + ca;
if ((a == 0xffffffff) && ca)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
uint32 a = hCPU->gpr[rA];
uint32 ca = hCPU->xer_ca;
hCPU->gpr[rD] = a + ca + 0xffffffff;
if (a || ca)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBFO(PPCInterpreter_t* hCPU, uint32 opcode)
{
// untested (Don't Starve Giant Edition uses this)
// also used by DS Virtual Console (Super Mario 64 DS)
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~hCPU->gpr[rA], hCPU->gpr[rB], hCPU->gpr[rD]));
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBFC(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
hCPU->gpr[rD] = ~a + b + 1;
// update xer
if (ppc_carry_3(~a, b, 1))
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
{
// used by DS Virtual Console (Super Mario 64 DS)
PPC_OPC_TEMPL3_XO();
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
hCPU->gpr[rD] = ~a + b + 1;
// update xer
if (ppc_carry_3(~a, b, 1))
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
// update xer SO/OV
if (checkAdditionOverflow(~a, b, hCPU->gpr[rD]))
{
hCPU->spr.XER |= XER_SO;
hCPU->spr.XER |= XER_OV;
}
else
{
hCPU->spr.XER &= ~XER_OV;
}
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBFIC(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
hCPU->gpr[rD] = ~a + imm + 1;
if (ppc_carry_3(~a, imm, 1))
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBFE(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
uint32 ca = hCPU->xer_ca;
hCPU->gpr[rD] = ~a + b + ca;
// update xer carry
if (ppc_carry_3(~a, b, ca))
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
// update cr0
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBFEO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
uint32 ca = hCPU->xer_ca;
uint32 result = ~a + b + ca;
hCPU->gpr[rD] = result;
// update xer carry
if (ppc_carry_3(~a, b, ca))
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
if (checkAdditionOverflow(~a, b, result))
{
hCPU->spr.XER |= XER_SO;
hCPU->spr.XER |= XER_OV;
}
else
{
hCPU->spr.XER &= ~XER_OV;
}
// update cr0
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBFZE(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
uint32 a = hCPU->gpr[rA];
uint32 ca = hCPU->xer_ca;
hCPU->gpr[rD] = ~a + ca;
if (a == 0 && ca)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode)
{
// untested
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
uint32 a = hCPU->gpr[rA];
uint32 ca = hCPU->xer_ca;
hCPU->gpr[rD] = ~a + 0xFFFFFFFF + ca;
// update xer carry
if (ppc_carry_3(~a, 0xFFFFFFFF, ca))
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
// update cr0
if (opcode & PPC_OPC_RC)
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MULHW_(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
sint64 a = (sint32)hCPU->gpr[rA];
sint64 b = (sint32)hCPU->gpr[rB];
sint64 c = a * b;
hCPU->gpr[rD] = ((uint64)c) >> 32;
if (opcode & PPC_OPC_RC) {
// update cr0 flags
#ifndef PUBLIC_RELEASE
assert_dbg();
#endif
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
}
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MULHWU_(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint64 a = hCPU->gpr[rA];
uint64 b = hCPU->gpr[rB];
uint64 c = a * b;
hCPU->gpr[rD] = c >> 32;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MULLW(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
sint64 result = (sint64)hCPU->gpr[rA] * (sint64)hCPU->gpr[rB];
hCPU->gpr[rD] = (uint32)result;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MULLWO(PPCInterpreter_t* hCPU, uint32 opcode)
{
// Don't Starve Giant Edition uses this instruction + BSO
// also used by FullBlast when a save file exists + it uses mfxer to access overflow result
PPC_OPC_TEMPL3_XO();
sint64 result = (sint64)hCPU->gpr[rA] * (sint64)hCPU->gpr[rB];
hCPU->gpr[rD] = (uint32)result;
if (result < -0x80000000ll && result > 0x7FFFFFFFLL)
{
hCPU->spr.XER |= XER_SO;
hCPU->spr.XER |= XER_OV;
}
else
{
hCPU->spr.XER &= ~XER_OV;
}
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MULLI(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
hCPU->gpr[rD] = hCPU->gpr[rA] * imm;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
sint32 a = hCPU->gpr[rA];
sint32 b = hCPU->gpr[rB];
if (b == 0)
{
forceLogDebug_printf("Error: Division by zero! [%08X]\n", (uint32)hCPU->instructionPointer);
b++;
}
hCPU->gpr[rD] = a / b;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
sint32 a = hCPU->gpr[rA];
sint32 b = hCPU->gpr[rB];
if (b == 0)
{
if (opcode & PPC_OPC_OE)
hCPU->spr.XER |= XER_OV;
PPCInterpreter_nextInstruction(hCPU);
return;
}
hCPU->gpr[rD] = a / b;
if (opcode & PPC_OPC_OE)
hCPU->spr.XER &= ~XER_OV;
// todo: Handle SO
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
if (hCPU->gpr[rB] == 0)
{
PPCInterpreter_nextInstruction(hCPU);
return;
}
hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_DIVWUO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
if (hCPU->gpr[rB] == 0)
{
if (opcode & PPC_OPC_OE)
hCPU->spr.XER |= XER_OV;
PPCInterpreter_nextInstruction(hCPU);
return;
}
hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
if (opcode & PPC_OPC_OE)
hCPU->spr.XER &= ~XER_OV;
// todo: Handle SO
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CREQV(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA) ^ ppc_getCRBit(hCPU, crB) ^ 1);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CRAND(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA)&ppc_getCRBit(hCPU, crB));
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CRANDC(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA)&(ppc_getCRBit(hCPU, crB) ^ 1));
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CROR(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA) | ppc_getCRBit(hCPU, crB));
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CRORC(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA) | (ppc_getCRBit(hCPU, crB) ^ 1));
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CRNOR(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
ppc_setCRBit(hCPU, crD, (ppc_getCRBit(hCPU, crA) | ppc_getCRBit(hCPU, crB)) ^ 1);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CRXOR(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA) ^ ppc_getCRBit(hCPU, crB));
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_NEG(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
hCPU->gpr[rD] = (uint32)-((sint32)hCPU->gpr[rA]);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_NEGO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
if (hCPU->gpr[rA] == 0x80000000)
{
hCPU->spr.XER |= XER_SO;
hCPU->spr.XER |= XER_OV;
}
else
{
hCPU->spr.XER &= ~XER_OV;
}
hCPU->gpr[rD] = (uint32)-((sint32)hCPU->gpr[rA]);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ANDX(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rA] = hCPU->gpr[rD] & hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ANDCX(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rA] = hCPU->gpr[rD] & ~hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ANDI_(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
hCPU->gpr[rA] = hCPU->gpr[rS] & imm;
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ANDIS_(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
hCPU->gpr[rA] = hCPU->gpr[rS] & imm;
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_NANDX(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rA] = ~(hCPU->gpr[rD] & hCPU->gpr[rB]);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_OR(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rA] = hCPU->gpr[rD] | hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ORC(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rA] = hCPU->gpr[rD] | ~hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ORI(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
hCPU->gpr[rA] = hCPU->gpr[rS] | imm;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ORIS(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
hCPU->gpr[rA] = hCPU->gpr[rS] | imm;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_NORX(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rA] = ~(hCPU->gpr[rD] | hCPU->gpr[rB]);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_XOR(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rA] = hCPU->gpr[rD] ^ hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_XORI(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_XORIS(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_EQV(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rA] = ~(hCPU->gpr[rD] ^ hCPU->gpr[rB]);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_RLWIMI(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rS, rA, SH, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
uint32 v = ppc_word_rotl(hCPU->gpr[rS], SH);
uint32 mask = ppc_mask(MB, ME);
hCPU->gpr[rA] = (v & mask) | (hCPU->gpr[rA] & ~mask);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_RLWINM(PPCInterpreter_t* hCPU, uint32 opcode)
{
sint32 rS, rA, SH, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
uint32 v = ppc_word_rotl(hCPU->gpr[rS], SH);
uint32 mask = ppc_mask(MB, ME);
hCPU->gpr[rA] = v & mask;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_RLWNM(PPCInterpreter_t* hCPU, uint32 opcode)
{
int rS, rA, rB, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, rB, MB, ME);
uint32 v = ppc_word_rotl(hCPU->gpr[rS], hCPU->gpr[rB]);
uint32 mask = ppc_mask(MB, ME);
hCPU->gpr[rA] = v & mask;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SLWX(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 s = hCPU->gpr[rB] & 0x3f;
if (s > 31)
hCPU->gpr[rA] = 0;
else
hCPU->gpr[rA] = hCPU->gpr[rD] << s;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SRAW(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 sh = hCPU->gpr[rB] & 0x3f;
hCPU->gpr[rA] = hCPU->gpr[rD];
if (sh > 31)
{
hCPU->xer_ca = (hCPU->gpr[rA] >> 31) & 1; // copy sign bit to ca
hCPU->gpr[rA] = (uint32)((sint32)hCPU->gpr[rA] >> 31); // fill all bits with sign bit
}
else
{
// ca is set when input is negative and non-zero bits are dropped by shift operation
uint8 caBit = (hCPU->gpr[rA] >> 31) & 1;
uint32 shiftedBits = hCPU->gpr[rA] & ~(0xFFFFFFFF << sh);
caBit &= (shiftedBits != 0 ? 1 : 0);
hCPU->xer_ca = caBit;
hCPU->gpr[rA] = (uint32)((sint32)hCPU->gpr[rA] >> sh);
}
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SRWX(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
uint32 v = hCPU->gpr[rB] & 0x3f;
if (v > 31)
hCPU->gpr[rA] = 0;
else
hCPU->gpr[rA] = hCPU->gpr[rD] >> v;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_SRAWI(PPCInterpreter_t* hCPU, uint32 opcode)
{
sint32 rS, rA;
uint32 SH;
PPC_OPC_TEMPL_X(opcode, rS, rA, SH);
hCPU->gpr[rA] = hCPU->gpr[rS];
hCPU->xer_ca = 0;
if (hCPU->gpr[rA] & 0x80000000)
{
uint32 ca = 0;
for (uint32 i = 0; i < SH; i++)
{
if (hCPU->gpr[rA] & 1)
ca = 1;
hCPU->gpr[rA] >>= 1;
hCPU->gpr[rA] |= 0x80000000;
}
if (ca)
hCPU->xer_ca = 1;
}
else
{
if (SH > 31)
hCPU->gpr[rA] = 0;
else
hCPU->gpr[rA] >>= SH;
}
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static uint32 _CNTLZW(uint32 v)
{
uint32 result = 0;
if (v == 0)
return 32;
if ((v & 0xFFFF0000) != 0) { result |= 16; v >>= 16; }
if ((v & 0xFF00FF00) != 0) { result |= 8; v >>= 8; }
if ((v & 0xF0F0F0F0) != 0) { result |= 4; v >>= 4; }
if ((v & 0xCCCCCCCC) != 0) { result |= 2; v >>= 2; }
if ((v & 0xAAAAAAAA) != 0) { result |= 1; }
result = 31 - result;
return result;
}
static void PPCInterpreter_CNTLZW(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
hCPU->gpr[rA] = _CNTLZW(hCPU->gpr[rD]);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_EXTSB(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
hCPU->gpr[rA] = (uint32)(sint32)(sint8)hCPU->gpr[rD];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_EXTSH(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
hCPU->gpr[rA] = (uint32)(sint32)(sint16)hCPU->gpr[rD];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 cr;
sint32 rA, rB;
PPC_OPC_TEMPL_X(opcode, cr, rA, rB);
cr >>= 2;
sint32 a = hCPU->gpr[rA];
sint32 b = hCPU->gpr[rB];
hCPU->cr[cr * 4 + 0] = 0;
hCPU->cr[cr * 4 + 1] = 0;
hCPU->cr[cr * 4 + 2] = 0;
hCPU->cr[cr * 4 + 3] = 0;
if (a < b)
hCPU->cr[cr * 4 + CR_BIT_LT] = 1;
else if (a > b)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
if ((hCPU->spr.XER & XER_SO) != 0)
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 cr;
int rA, rB;
PPC_OPC_TEMPL_X(opcode, cr, rA, rB);
cr >>= 2;
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
hCPU->cr[cr * 4 + 0] = 0;
hCPU->cr[cr * 4 + 1] = 0;
hCPU->cr[cr * 4 + 2] = 0;
hCPU->cr[cr * 4 + 3] = 0;
if (a < b)
hCPU->cr[cr * 4 + CR_BIT_LT] = 1;
else if (a > b)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
if ((hCPU->spr.XER & XER_SO) != 0)
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 cr;
int rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm);
cr >>= 2;
sint32 a = hCPU->gpr[rA];
sint32 b = imm;
hCPU->cr[cr * 4 + 0] = 0;
hCPU->cr[cr * 4 + 1] = 0;
hCPU->cr[cr * 4 + 2] = 0;
hCPU->cr[cr * 4 + 3] = 0;
if (a < b)
hCPU->cr[cr * 4 + CR_BIT_LT] = 1;
else if (a > b)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
if (hCPU->spr.XER & XER_SO)
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 cr;
int rA;
uint32 imm;
PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm);
cr >>= 2;
uint32 a = hCPU->gpr[rA];
uint32 b = imm;
hCPU->cr[cr * 4 + 0] = 0;
hCPU->cr[cr * 4 + 1] = 0;
hCPU->cr[cr * 4 + 2] = 0;
hCPU->cr[cr * 4 + 3] = 0;
if (a < b)
hCPU->cr[cr * 4 + CR_BIT_LT] = 1;
else if (a > b)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
if (hCPU->spr.XER & XER_SO)
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
PPCInterpreter_nextInstruction(hCPU);
}

View file

@ -0,0 +1,700 @@
#include "../PPCState.h"
#include "PPCInterpreterInternal.h"
#include "PPCInterpreterHelper.h"
#include<math.h>
// floating point utility
#include <limits>
#include <array>
const int ieee_double_e_bits = 11; // exponent bits
const int ieee_double_m_bits = 52; // mantissa bits
const int espresso_frsqrte_i_bits = 5; // index bits (the highest bit is the LSB of the exponent)
typedef struct
{
uint32 offset;
uint32 step;
}espresso_frsqrte_entry_t;
espresso_frsqrte_entry_t frsqrteLookupTable[32] =
{
{0x1a7e800, 0x568},{0x17cb800, 0x4f3},{0x1552800, 0x48d},{0x130c000, 0x435},
{0x10f2000, 0x3e7},{0xeff000, 0x3a2},{0xd2e000, 0x365},{0xb7c000, 0x32e},
{0x9e5000, 0x2fc},{0x867000, 0x2d0},{0x6ff000, 0x2a8},{0x5ab800, 0x283},
{0x46a000, 0x261},{0x339800, 0x243},{0x218800, 0x226},{0x105800, 0x20b},
{0x3ffa000, 0x7a4},{0x3c29000, 0x700},{0x38aa000, 0x670},{0x3572000, 0x5f2},
{0x3279000, 0x584},{0x2fb7000, 0x524},{0x2d26000, 0x4cc},{0x2ac0000, 0x47e},
{0x2881000, 0x43a},{0x2665000, 0x3fa},{0x2468000, 0x3c2},{0x2287000, 0x38e},
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
};
double frsqrte_espresso(double input)
{
unsigned long long x = *(unsigned long long*)&input;
// 0.0 and -0.0
if ((x << 1) == 0)
{
// result is inf or -inf
x &= ~0x7FFFFFFFFFFFFFFF;
x |= 0x7FF0000000000000;
return *(double*)&x;
}
// get exponent
uint32 e = (x >> ieee_double_m_bits) & ((1ull << ieee_double_e_bits) - 1ull);
// NaN or INF
if (e == 0x7FF)
{
if ((x&((1ull << ieee_double_m_bits) - 1)) == 0)
{
// negative INF returns +NaN
if ((sint64)x < 0)
{
x = 0x7FF8000000000000;
return *(double*)&x;
}
// positive INF returns +0.0
return 0.0;
}
// result is NaN with same sign and same mantissa (todo: verify)
return *(double*)&x;
}
// negative number (other than -0.0)
if ((sint64)x < 0)
{
// result is positive NaN
x = 0x7FF8000000000000;
return *(double*)&x;
}
// todo: handle denormals
// get index (lsb of exponent, remaining bits of mantissa)
uint32 idx = (x >> (ieee_double_m_bits - espresso_frsqrte_i_bits + 1ull))&((1 << espresso_frsqrte_i_bits) - 1);
// get step multiplier
uint32 stepMul = (x >> (ieee_double_m_bits - espresso_frsqrte_i_bits + 1 - 11))&((1 << 11) - 1);
sint32 sum = frsqrteLookupTable[idx].offset - frsqrteLookupTable[idx].step * stepMul;
e = 1023 - ((e - 1021) >> 1);
x &= ~(((1ull << ieee_double_e_bits) - 1ull) << ieee_double_m_bits);
x |= ((unsigned long long)e << ieee_double_m_bits);
x &= ~((1ull << ieee_double_m_bits) - 1ull);
x += ((unsigned long long)sum << 26ull);
return *(double*)&x;
}
const int espresso_fres_i_bits = 5; // index bits
const int espresso_fres_s_bits = 10; // step multiplier bits
typedef struct
{
uint32 offset;
uint32 step;
}espresso_fres_entry_t;
espresso_fres_entry_t fresLookupTable[32] =
{
// table calculated by fres_gen_table()
{0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340},
{0x638800, 0x313}, {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0},
{0x4cc800, 0x27f}, {0x47ca00, 0x261}, {0x430800, 0x245}, {0x3e8000, 0x22a},
{0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5}, {0x2e4a00, 0x1d1},
{0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b},
{0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b},
{0x124400, 0x143}, {0xfbe00, 0x143}, {0xd3800, 0x12d}, {0xade00, 0x12d},
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
};
double fres_espresso(double input)
{
// based on testing we know that fres uses only the first 15 bits of the mantissa
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
// the mantissa bits are interpreted as following:
// 0000 0000 0000 iiii ifff ffff fff0 ... (i = table look up index , f = step multiplier)
unsigned long long x = *(unsigned long long*)&input;
// get index
uint32 idx = (x >> (ieee_double_m_bits - espresso_fres_i_bits))&((1 << espresso_fres_i_bits) - 1);
// get step multiplier
uint32 stepMul = (x >> (ieee_double_m_bits - espresso_fres_i_bits - 10))&((1 << 10) - 1);
uint32 sum = fresLookupTable[idx].offset - (fresLookupTable[idx].step * stepMul + 1) / 2;
// get exponent
uint32 e = (x >> ieee_double_m_bits) & ((1ull << ieee_double_e_bits) - 1ull);
if (e == 0)
{
// todo?
//x &= 0x7FFFFFFFFFFFFFFFull;
x |= 0x7FF0000000000000ull;
return *(double*)&x;
}
else if (e == 0x7ff) // NaN or INF
{
if ((x&((1ull << ieee_double_m_bits) - 1)) == 0)
{
// negative INF returns -0.0
if ((sint64)x < 0)
{
x = 0x8000000000000000;
return *(double*)&x;
}
// positive INF returns +0.0
return 0.0;
}
// result is NaN with same sign and same mantissa (todo: verify)
return *(double*)&x;
}
// todo - needs more testing (especially NaN and INF values)
e = 2045 - e;
x &= ~(((1ull << ieee_double_e_bits) - 1ull) << ieee_double_m_bits);
x |= ((unsigned long long)e << ieee_double_m_bits);
x &= ~((1ull << ieee_double_m_bits) - 1ull);
x += ((unsigned long long)sum << 29ull);
return *(double*)&x;
}
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b)
{
uint32 c;
ppc_setCRBit(hCPU, crfD + 0, 0);
ppc_setCRBit(hCPU, crfD + 1, 0);
ppc_setCRBit(hCPU, crfD + 2, 0);
ppc_setCRBit(hCPU, crfD + 3, 0);
if (IS_NAN(*(uint64*)&a) || IS_NAN(*(uint64*)&b))
{
c = 1;
ppc_setCRBit(hCPU, crfD + CR_BIT_SO, 1);
}
else if (a < b)
{
c = 8;
ppc_setCRBit(hCPU, crfD + CR_BIT_LT, 1);
}
else if (a > b)
{
c = 4;
ppc_setCRBit(hCPU, crfD + CR_BIT_GT, 1);
}
else
{
c = 2;
ppc_setCRBit(hCPU, crfD + CR_BIT_EQ, 1);
}
if (IS_SNAN(*(uint64*)&a) || IS_SNAN(*(uint64*)&b))
hCPU->fpscr |= FPSCR_VXSNAN;
hCPU->fpscr = (hCPU->fpscr & 0xffff0fff) | (c << 12);
}
void PPCInterpreter_FMR(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, rA, frB;
PPC_OPC_TEMPL_X(Opcode, frD, rA, frB);
PPC_ASSERT(rA==0);
hCPU->fpr[frD].fpr = hCPU->fpr[frB].fpr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FSEL(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
if ( hCPU->fpr[frA].fp0 >= -0.0f )
hCPU->fpr[frD] = hCPU->fpr[frC];
else
hCPU->fpr[frD] = hCPU->fpr[frB];
PPC_ASSERT((Opcode & PPC_OPC_RC) != 0); // update CR1 flags
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FCTIWZ(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB;
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
PPC_ASSERT(frA==0);
double b = hCPU->fpr[frB].fpr;
uint64 v;
if (b > (double)0x7FFFFFFF)
{
v = (uint64)0x7FFFFFFF;
}
else if (b < -(double)0x80000000)
{
v = (uint64)0x80000000;
}
else
{
v = (uint64)(uint32)(sint32)b;
}
hCPU->fpr[frD].guint = 0xFFF8000000000000ULL | v;
if (v == 0 && ((*(uint64*)&b) >> 63))
hCPU->fpr[frD].guint |= 0x100000000ull;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FCTIW(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB;
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
PPC_ASSERT(frA==0);
double b = hCPU->fpr[frB].fpr;
uint64 v;
if (b > (double)0x7FFFFFFF)
{
v = (uint64)0x7FFFFFFF;
}
else if (b < -(double)0x80000000)
{
v = (uint64)0x80000000;
}
else
{
// todo: Support for other rounding modes than NEAR
double t = b + 0.5;
sint32 i = (sint32)t;
if (t - i < 0 || (t - i == 0 && b > 0))
{
i--;
}
v = (uint64)i;
}
hCPU->fpr[frD].guint = 0xFFF8000000000000ULL | v;
if (v == 0 && ((*(uint64*)&b) >> 63))
hCPU->fpr[frD].guint |= 0x100000000ull;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FNEG(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB;
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
PPC_ASSERT(frA==0);
hCPU->fpr[frD].guint = hCPU->fpr[frB].guint ^ (1ULL << 63);
PPC_ASSERT((Opcode & PPC_OPC_RC) != 0); // update CR1 flags
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FRSP(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB;
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
PPC_ASSERT(frA==0);
if( PPC_PSE )
{
hCPU->fpr[frD].fp0 = (float)hCPU->fpr[frB].fpr;
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
}
else
{
hCPU->fpr[frD].fpr = (float)hCPU->fpr[frB].fpr;
}
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FRSQRTE(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frA==0 && frC==0);
hCPU->fpr[frD].fpr = frsqrte_espresso(hCPU->fpr[frB].fpr);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FRES(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frA==0 && frC==0);
hCPU->fpr[frD].fpr = fres_espresso(hCPU->fpr[frB].fpr);
if(PPC_PSE)
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
// Floating point ALU
void PPCInterpreter_FABS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB;
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
PPC_ASSERT(frA==0);
hCPU->fpr[frD].guint = hCPU->fpr[frB].guint & ~0x8000000000000000;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FNABS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB;
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
PPC_ASSERT(frA==0);
hCPU->fpr[frD].guint = hCPU->fpr[frB].guint | 0x8000000000000000;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FADD(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frC==0);
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr + hCPU->fpr[frB].fpr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FDIV(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frC==0);
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr / hCPU->fpr[frB].fpr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frC==0);
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr - hCPU->fpr[frB].fpr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FMUL(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frC == 0);
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FMADD(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FNMADD(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
hCPU->fpr[frD].fpr = -(hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FMSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
hCPU->fpr[frD].fpr = (hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr - hCPU->fpr[frB].fpr);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FNMSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
hCPU->fpr[frD].fpr = -(hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr - hCPU->fpr[frB].fpr);
PPCInterpreter_nextInstruction(hCPU);
}
// Move
void PPCInterpreter_MFFS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, rA, rB;
PPC_OPC_TEMPL_X(Opcode, frD, rA, rB);
PPC_ASSERT(rA==0 && rB==0);
hCPU->fpr[frD].guint = (uint64)hCPU->fpscr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_MTFSF(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frB;
uint32 fm, FM;
PPC_OPC_TEMPL_XFL(Opcode, frB, fm);
FM = ((fm&0x80)?0xf0000000:0)|((fm&0x40)?0x0f000000:0)|((fm&0x20)?0x00f00000:0)|((fm&0x10)?0x000f0000:0)|
((fm&0x08)?0x0000f000:0)|((fm&0x04)?0x00000f00:0)|((fm&0x02)?0x000000f0:0)|((fm&0x01)?0x0000000f:0);
hCPU->fpscr = (hCPU->fpr[frB].guint & FM) | (hCPU->fpscr & ~FM);
PPC_ASSERT((Opcode & PPC_OPC_RC) != 0); // update CR1 flags
static bool logFPSCRWriteOnce = false;
if( logFPSCRWriteOnce == false )
{
forceLog_printf("Unsupported write to FPSCR\n");
logFPSCRWriteOnce = true;
}
PPCInterpreter_nextInstruction(hCPU);
}
// single precision
void PPCInterpreter_FADDS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frB == 0);
// todo: check for RC
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr + hCPU->fpr[frB].fpr);
if (PPC_PSE)
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FSUBS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frB == 0);
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr - hCPU->fpr[frB].fpr);
if (PPC_PSE)
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FDIVS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frB==0);
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr / hCPU->fpr[frB].fpr);
if( PPC_PSE )
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FMULS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
PPC_ASSERT(frB == 0);
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr * roundTo25BitAccuracy(hCPU->fpr[frC].fpr));
if (PPC_PSE)
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FMADDS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr * roundTo25BitAccuracy(hCPU->fpr[frC].fpr) + hCPU->fpr[frB].fpr);
if (PPC_PSE)
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FNMADDS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
hCPU->fpr[frD].fpr = (float)-(hCPU->fpr[frA].fpr * roundTo25BitAccuracy(hCPU->fpr[frC].fpr) + hCPU->fpr[frB].fpr);
if (PPC_PSE)
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FMSUBS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) - hCPU->fpr[frB].fp0);
if (PPC_PSE)
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FNMSUBS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int frD, frA, frB, frC;
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
hCPU->fpr[frD].fp0 = (float)-(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) - hCPU->fpr[frB].fp0);
if (PPC_PSE)
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
PPCInterpreter_nextInstruction(hCPU);
}
// Compare
void PPCInterpreter_FCMPO(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int crfD, frA, frB;
PPC_OPC_TEMPL_X(Opcode, crfD, frA, frB);
crfD >>= 2;
hCPU->cr[crfD*4+0] = 0;
hCPU->cr[crfD*4+1] = 0;
hCPU->cr[crfD*4+2] = 0;
hCPU->cr[crfD*4+3] = 0;
uint32 c;
if(IS_NAN(hCPU->fpr[frA].guint) || IS_NAN(hCPU->fpr[frB].guint))
{
c = 1;
hCPU->cr[crfD*4+CR_BIT_SO] = 1;
}
else if(hCPU->fpr[frA].fpr < hCPU->fpr[frB].fpr)
{
c = 8;
hCPU->cr[crfD*4+CR_BIT_LT] = 1;
}
else if(hCPU->fpr[frA].fpr > hCPU->fpr[frB].fpr)
{
c = 4;
hCPU->cr[crfD*4+CR_BIT_GT] = 1;
}
else
{
c = 2;
hCPU->cr[crfD*4+CR_BIT_EQ] = 1;
}
hCPU->fpscr = (hCPU->fpscr & 0xffff0fff) | (c << 12);
if (IS_SNAN (hCPU->fpr[frA].guint) || IS_SNAN (hCPU->fpr[frB].guint))
hCPU->fpscr |= FPSCR_VXSNAN;
else if (!(hCPU->fpscr & FPSCR_VE) || IS_QNAN (hCPU->fpr[frA].guint) || IS_QNAN (hCPU->fpr[frB].guint))
hCPU->fpscr |= FPSCR_VXVC;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_FCMPU(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
int crfD, frA, frB;
PPC_OPC_TEMPL_X(Opcode, crfD, frA, frB);
cemu_assert_debug((crfD % 4) == 0);
fcmpu_espresso(hCPU, crfD, hCPU->fpr[frA].fp0, hCPU->fpr[frB].fp0);
PPCInterpreter_nextInstruction(hCPU);
}

View file

@ -0,0 +1,64 @@
#include "../PPCState.h"
#include "PPCInterpreterInternal.h"
#include "PPCInterpreterHelper.h"
std::unordered_set<std::string> sUnsupportedHLECalls;
void PPCInterpreter_handleUnsupportedHLECall(PPCInterpreter_t* hCPU)
{
const char* libFuncName = (char*)memory_getPointerFromVirtualOffset(hCPU->instructionPointer + 8);
std::string tempString = fmt::format("Unsupported lib call: {}", libFuncName);
if (sUnsupportedHLECalls.find(tempString) == sUnsupportedHLECalls.end())
{
cemuLog_log(LogType::UnsupportedAPI, "{}", tempString);
sUnsupportedHLECalls.emplace(tempString);
}
hCPU->gpr[3] = 0;
PPCInterpreter_nextInstruction(hCPU);
}
std::vector<void(*)(PPCInterpreter_t* hCPU)>* sPPCHLETable{};
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall)
{
if (!sPPCHLETable)
sPPCHLETable = new std::vector<void(*)(PPCInterpreter_t* hCPU)>();
for (sint32 i = 0; i < sPPCHLETable->size(); i++)
{
if ((*sPPCHLETable)[i] == hleCall)
return i;
}
HLEIDX newFuncIndex = (sint32)sPPCHLETable->size();
sPPCHLETable->resize(sPPCHLETable->size() + 1);
(*sPPCHLETable)[newFuncIndex] = hleCall;
return newFuncIndex;
}
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex)
{
if (funcIndex < 0 || funcIndex >= sPPCHLETable->size())
return nullptr;
return sPPCHLETable->data()[funcIndex];
}
std::mutex g_hleLogMutex;
void PPCInterpreter_virtualHLE(PPCInterpreter_t* hCPU, unsigned int opcode)
{
uint32 hleFuncId = opcode & 0xFFFF;
if (hleFuncId == 0xFFD0)
{
g_hleLogMutex.lock();
PPCInterpreter_handleUnsupportedHLECall(hCPU);
g_hleLogMutex.unlock();
return;
}
else
{
// os lib function
cemu_assert(hleFuncId < sPPCHLETable->size());
auto hleCall = (*sPPCHLETable)[hleFuncId];
cemu_assert(hleCall);
hleCall(hCPU);
}
}

View file

@ -0,0 +1,189 @@
static uint32 ppc_cmp_and_mask[8] = {
0xfffffff0,
0xffffff0f,
0xfffff0ff,
0xffff0fff,
0xfff0ffff,
0xff0fffff,
0xf0ffffff,
0x0fffffff,
};
#define ppc_word_rotl(_data, _n) (_rotl(_data,(_n)&0x1F))
static inline uint32 ppc_mask(int MB, int ME)
{
uint32 maskMB = 0xFFFFFFFF >> MB;
uint32 maskME = 0xFFFFFFFF << (31-ME);
uint32 mask2 = (MB <= ME) ? maskMB & maskME : maskMB | maskME;
return mask2;
}
static inline bool ppc_carry_3(uint32 a, uint32 b, uint32 c)
{
if ((a+b) < a) {
return true;
}
if ((a+b+c) < c) {
return true;
}
return false;
}
#define PPC_getBits(__value, __index, __bitCount) ((__value>>(31-__index))&((1<<__bitCount)-1))
const static float LD_SCALE[] = {
1.000000f, 0.500000f, 0.250000f, 0.125000f, 0.062500f, 0.031250f, 0.015625f,
0.007813f, 0.003906f, 0.001953f, 0.000977f, 0.000488f, 0.000244f, 0.000122f,
0.000061f, 0.000031f, 0.000015f, 0.000008f, 0.000004f, 0.000002f, 0.000001f,
0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f, 4294967296.000000f, 2147483648.000000f,
1073741824.000000f, 536870912.000000f, 268435456.000000f, 134217728.000000f, 67108864.000000f,
33554432.000000f, 16777216.000000f, 8388608.000000f, 4194304.000000f, 2097152.000000f, 1048576.000000f,
524288.000000f, 262144.000000f, 131072.000000f, 65536.000000f, 32768.000000f, 16384.000000f,
8192.000000f, 4096.000000f, 2048.000000f, 1024.000000f, 512.000000f, 256.000000f, 128.000000f, 64.000000f, 32.000000f,
16.000000f, 8.000000f, 4.000000f, 2.000000f };
const static float ST_SCALE[] = {
1.000000f, 2.000000f, 4.000000f, 8.000000f,
16.000000f, 32.000000f, 64.000000f, 128.000000f,
256.000000f, 512.000000f, 1024.000000f, 2048.000000f,
4096.000000f, 8192.000000f, 16384.000000f, 32768.000000f,
65536.000000f, 131072.000000f, 262144.000000f, 524288.000000f,
1048576.000000f, 2097152.000000f, 4194304.000000f, 8388608.000000f,
16777216.000000f, 33554432.000000f, 67108864.000000f, 134217728.000000f,
268435456.000000f, 536870912.000000f, 1073741824.000000f, 2147483648.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000001f, 0.000002f, 0.000004f, 0.000008f,
0.000015f, 0.000031f, 0.000061f, 0.000122f,
0.000244f, 0.000488f, 0.000977f, 0.001953f,
0.003906f, 0.007813f, 0.015625f, 0.031250f,
0.062500f, 0.125000f, 0.250000f, 0.500000f };
static float dequantize(uint32 data, sint32 type, uint8 scale)
{
float f;
switch (type)
{
case 4: // u8
f = (float)(uint8)data;
f *= LD_SCALE[scale];
break;
case 5: // u16
f = (float)(uint16)data;
f *= LD_SCALE[scale];
break;
case 6: // s8
f = (float)(sint8)data;
f *= LD_SCALE[scale];
break;
case 7: // float
f = (float)(sint16)data;
f *= LD_SCALE[scale];
break;
case 0:
default:
f = *((float *)&data);
// scale does not apply when loading floats
break;
}
return f;
}
static uint32 quantize(float data, sint32 type, uint8 scale)
{
uint32 val;
switch (type)
{
case 4: // u8
data *= ST_SCALE[scale];
if (data < 0) data = 0;
if (data > 255) data = 255;
val = (uint8)(uint32)data;
break;
case 5: // u16
data *= ST_SCALE[scale];
if (data < 0) data = 0;
if (data > 65535) data = 65535;
val = (uint16)(uint32)data;
break;
case 6: // s8
data *= ST_SCALE[scale];
if (data < -128) data = -128;
if (data > 127) data = 127;
val = (sint8)(uint8)(sint32)(uint32)data;
break;
case 7: // s16
data *= ST_SCALE[scale];
if (data < -32768) data = -32768;
if (data > 32767) data = 32767;
val = (sint16)(uint16)(sint32)(uint32)data;
break;
case 0: // float
default:
// scale does not apply when storing floats
*((float*)&val) = data;
break;
}
return val;
}
#define _uint32_fastSignExtend(__v, __bits) (uint32)(((sint32)(__v)<<(31-(__bits)))>>(31-(__bits)));
static inline uint64 ConvertToDoubleNoFTZ(uint32 value)
{
// http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
uint64 x = value;
uint64 exp = (x >> 23) & 0xff;
uint64 frac = x & 0x007fffff;
if (exp > 0 && exp < 255)
{
uint64 y = !(exp >> 7);
uint64 z = y << 61 | y << 60 | y << 59;
return ((x & 0xc0000000) << 32) | z | ((x & 0x3fffffff) << 29);
}
else if (exp == 0 && frac != 0) // denormal
{
exp = 1023 - 126;
do
{
frac <<= 1;
exp -= 1;
} while ((frac & 0x00800000) == 0);
return ((x & 0x80000000) << 32) | (exp << 52) | ((frac & 0x007fffff) << 29);
}
else // QNaN, SNaN or Zero
{
uint64 y = exp >> 7;
uint64 z = y << 61 | y << 60 | y << 59;
return ((x & 0xc0000000) << 32) | z | ((x & 0x3fffffff) << 29);
}
}
static inline uint32 ConvertToSingleNoFTZ(uint64 x)
{
uint32 exp = (x >> 52) & 0x7ff;
if (exp > 896 || (x & ~0x8000000000000000ULL) == 0)
{
return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff);
}
else if (exp >= 874)
{
uint32 t = (uint32)(0x80000000 | ((x & 0x000FFFFFFFFFFFFFULL) >> 21));
t = t >> (905 - exp);
t |= (x >> 32) & 0x80000000;
return t;
}
else
{
return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,300 @@
#pragma once
#include "Cafe/HW/Espresso/PPCState.h"
// SPR constants
#define SPR_XER 1
#define SPR_LR 8
#define SPR_CTR 9
#define SPR_DEC 22
#define SPR_SRR0 26
#define SPR_SRR1 27
#define SPR_HID0 1008
#define SPR_HID1 1009
#define SPR_HID2 920
#define SPR_TBL 268
#define SPR_TBU 269
#define SPR_DMAU 922
#define SPR_DMAL 923
// graphics quantization registers
#define SPR_GQR0 912
#define SPR_GQR1 913
#define SPR_GQR2 914
#define SPR_GQR3 915
#define SPR_GQR4 916
#define SPR_GQR5 917
#define SPR_GQR6 918
#define SPR_GQR7 919
// user graphics quantization registers
#define SPR_UGQR0 896
#define SPR_UGQR1 897
#define SPR_UGQR2 898
#define SPR_UGQR3 899
#define SPR_UGQR4 900
#define SPR_UGQR5 901
#define SPR_UGQR6 902
#define SPR_UGQR7 903
#define SPR_FPECR 1022 // used by the OS to store values
#define SPR_PVR 287 // processor version, for Wii U this must be 0x7001xxxx - this register is only readable
#define SPR_UPIR 1007 // core index
#define SPR_SCR 947 // core control
#define SPR_SDR1 25
// reversed CR bit indices
#define CR_BIT_LT 0
#define CR_BIT_GT 1
#define CR_BIT_EQ 2
#define CR_BIT_SO 3
#define XER_SO (1<<31) // summary overflow bit
#define XER_OV (1<<30) // overflow bit
#define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8
// FPSCR
#define FPSCR_VXSNAN (1<<24)
#define FPSCR_VXVC (1<<19)
#define MSR_SF (1<<31)
#define MSR_UNKNOWN (1<<30)
#define MSR_UNKNOWN2 (1<<27)
#define MSR_VEC (1<<25)
#define MSR_POW (1<<18)
#define MSR_TGPR (1<<15)
#define MSR_ILE (1<<16)
#define MSR_EE (1<<15)
#define MSR_PR (1<<14)
#define MSR_FP (1<<13)
#define MSR_ME (1<<12)
#define MSR_FE0 (1<<11)
#define MSR_SE (1<<10)
#define MSR_BE (1<<9)
#define MSR_FE1 (1<<8)
#define MSR_IP (1<<6)
#define MSR_IR (1<<5)
#define MSR_DR (1<<4)
#define MSR_PM (1<<2)
#define MSR_RI (1<<1)
#define MSR_LE (1)
// helpers
#define GET_MSR_BIT(__bit) ((hCPU->sprExtended.msr&(__bit))!=0)
#define opHasRC() ((opcode & PPC_OPC_RC) != 0)
// assume fixed values for PSE/LSQE. This optimization is possible because Wii U applications run only in user mode (todo - handle this correctly in LLE mode)
//#define PPC_LSQE (hCPU->LSQE)
//#define PPC_PSE (hCPU->PSE)
#define PPC_LSQE (1)
#define PPC_PSE (1)
#define PPC_ASSERT(v)
#define PPC_OPC_RC 1
#define PPC_OPC_OE (1<<10)
#define PPC_OPC_LK 1
#define PPC_OPC_AA (1<<1)
#define PPC_OPC_TEMPL_A(opc, rD, rA, rB, rC) {rD=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;rB=((opc)>>11)&0x1f;rC=((opc)>>6)&0x1f;}
#define PPC_OPC_TEMPL_B(opc, BO, BI, BD) {BO=((opc)>>21)&0x1f;BI=((opc)>>16)&0x1f;BD=(uint32)(sint32)(sint16)((opc)&0xfffc);}
#define PPC_OPC_TEMPL_D_SImm(opc, rD, rA, imm) {rD=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;imm=(uint32)(sint32)(sint16)((opc)&0xffff);}
#define PPC_OPC_TEMPL_D_UImm(opc, rD, rA, imm) {rD=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;imm=(opc)&0xffff;}
#define PPC_OPC_TEMPL_D_Shift16(opc, rD, rA, imm) {rD=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;imm=(opc)<<16;}
#define PPC_OPC_TEMPL_I(opc, LI) {LI=(opc)&0x3fffffc;if (LI&0x02000000) LI |= 0xfc000000;}
#define PPC_OPC_TEMPL_M(opc, rS, rA, SH, MB, ME) {rS=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;SH=((opc)>>11)&0x1f;MB=((opc)>>6)&0x1f;ME=((opc)>>1)&0x1f;}
#define PPC_OPC_TEMPL_X(opc, rS, rA, rB) {rS=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;rB=((opc)>>11)&0x1f;}
#define PPC_OPC_TEMPL_XFX(opc, rS, CRM) {rS=((opc)>>21)&0x1f;CRM=((opc)>>12)&0xff;}
#define PPC_OPC_TEMPL_XO(opc, rS, rA, rB) {rS=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;rB=((opc)>>11)&0x1f;}
#define PPC_OPC_TEMPL_XL(opc, BO, BI, BD) {BO=((opc)>>21)&0x1f;BI=((opc)>>16)&0x1f;BD=((opc)>>11)&0x1f;}
#define PPC_OPC_TEMPL_XFL(opc, rB, FM) {rB=((opc)>>11)&0x1f;FM=((opc)>>17)&0xff;}
#define PPC_OPC_TEMPL3_XO() sint32 rD, rA, rB; rD=((opcode)>>21)&0x1f;rA=((opcode)>>16)&0x1f;rB=((opcode)>>11)&0x1f
#define PPC_OPC_TEMPL_X_CR() sint32 crD, crA, crB; crD=((opcode)>>21)&0x1f;crA=((opcode)>>16)&0x1f;crB=((opcode)>>11)&0x1f
static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r)
{
hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0;
hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0);
hCPU->cr[CR_BIT_EQ] = (r == 0);
hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0
}
static inline uint8 ppc_getCRBit(PPCInterpreter_t* hCPU, uint32 r)
{
return hCPU->cr[r];
}
static inline bool ppc_MTCRFMaskHasCRFieldSet(const uint32 mtcrfMask, const uint32 crIndex)
{
// 1000 0000 (0x80) -> cr0
// 0000 0001 (0x01) -> cr7
return (mtcrfMask & (1 << (7 - crIndex))) != 0;
}
// returns CR mask with CR0.LT in LSB
static inline uint32 ppc_MTCRFMaskToCRBitMask(const uint32 mtcrfMask)
{
uint32 crMask = 0;
for (uint32 crF = 0; crF < 8; crF++)
{
if (ppc_MTCRFMaskHasCRFieldSet(mtcrfMask, crF))
crMask |= (0xF << (crF * 4));
}
return crMask;
}
static inline void ppc_setCRBit(PPCInterpreter_t* hCPU, uint32 r, uint8 v)
{
hCPU->cr[r] = v;
}
static inline void ppc_setCR(PPCInterpreter_t* hCPU, uint32 cr)
{
uint32 tempCr = cr;
for (sint32 i = 31; i >= 0; i--)
{
ppc_setCRBit(hCPU, i, tempCr & 1);
tempCr >>= 1;
}
}
static inline uint32 ppc_getCR(PPCInterpreter_t* hCPU)
{
uint32 cr = 0;
for (sint32 i = 0; i < 32; i++)
{
cr <<= 1;
if (ppc_getCRBit(hCPU, i))
cr |= 1;
}
return cr;
}
// FPU helper
#define IS_NAN(X) ((((X) & 0x000fffffffffffffULL) != 0) && (((X) & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL))
#define IS_QNAN(X) ((((X) & 0x000fffffffffffffULL) != 0) && (((X) & 0x7ff8000000000000ULL) == 0x7ff8000000000000ULL))
#define IS_SNAN(X) ((((X) & 0x000fffffffffffffULL) != 0) && (((X) & 0x7ff8000000000000ULL) == 0x7ff0000000000000ULL))
#define FPSCR_VE (1 << 7)
inline double roundTo25BitAccuracy(double d)
{
uint64 v = *(uint64*)&d;
v = (v & 0xFFFFFFFFF8000000ULL) + (v & 0x8000000ULL);
return *(double*)&v;
}
double fres_espresso(double input);
double frsqrte_espresso(double input);
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
// OPC
void PPCInterpreter_virtualHLE(PPCInterpreter_t* hCPU, unsigned int opcode);
void PPCInterpreter_MFMSR(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MTMSR(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MFTB(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MTFSB1X(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MFCR(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MCRF(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MTCRF(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MCRXR(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_TLBIE(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_TLBSYNC(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_DCBT(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_DCBST(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_DCBZL(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_DCBF(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_DCBI(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_DCBZ(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_ICBI(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_EIEIO(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_SC(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_SYNC(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_ISYNC(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_RFI(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_BX(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_BCX(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_BCLRX(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_BCCTR(PPCInterpreter_t* hCPU, uint32 Opcode);
// FPU
void PPCInterpreter_FCMPO(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FCMPU(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FMR(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FSEL(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FCTIWZ(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FCTIW(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FNEG(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FRSP(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FRSQRTE(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FRES(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FABS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FNABS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FADD(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FMUL(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FDIV(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FMADD(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FMSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FMSUBS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FNMADD(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FNMSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MFFS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_MTFSF(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FDIVS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FMULS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FADDS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FSUBS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FMADDS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FNMADDS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_FNMSUBS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MERGE00(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MERGE01(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MERGE10(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MERGE11(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MR(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_NEG(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_ABS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_NABS(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_RES(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_RSQRTE(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_ADD(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_SUB(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MUL(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_DIV(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MADD(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_NMADD(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MADDS0(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MADDS1(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_NMSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_SEL(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_SUM0(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_SUM1(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MULS0(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_MULS1(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_CMPO0(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_CMPU0(PPCInterpreter_t* hCPU, uint32 Opcode);
void PPCInterpreter_PS_CMPU1(PPCInterpreter_t* hCPU, uint32 Opcode);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,107 @@
#include "PPCInterpreterInternal.h"
#include "Cafe/OS/RPL/rpl.h"
#include "Cafe/GameProfile/GameProfile.h"
#include "Cafe/HW/Espresso/Debugger/Debugger.h"
thread_local PPCInterpreter_t* ppcInterpreterCurrentInstance;
// main thread instruction counter and timing
volatile uint64 ppcMainThreadCycleCounter = 0;
uint64 ppcMainThreadDECCycleValue = 0; // value that was set to dec register
uint64 ppcMainThreadDECCycleStart = 0; // at which cycle the dec register was set, if == 0 -> dec is 0
uint64 ppcCyclesSince2000 = 0;
uint64 ppcCyclesSince2000TimerClock = 0;
uint64 ppcCyclesSince2000_UTC = 0;
PPCInterpreter_t* PPCInterpreter_createInstance(unsigned int Entrypoint)
{
PPCInterpreter_t* pData;
// create instance
uint32 prefixAreaSize = 0x6000; // we need to allocate some bytes before the interpreter struct because the recompiler will use it as stack area (specifically when the exception handler is called)
pData = (PPCInterpreter_t*)((uint8*)malloc(sizeof(PPCInterpreter_t)+prefixAreaSize)+prefixAreaSize);
memset((void*)pData, 0x00, sizeof(PPCInterpreter_t));
// set instruction pointer to entrypoint
pData->instructionPointer = (uint32)Entrypoint;
// set initial register values
pData->gpr[GPR_SP] = 0x00000000;
pData->spr.LR = 0;
// return instance
return pData;
}
PPCInterpreter_t* PPCInterpreter_getCurrentInstance()
{
return ppcInterpreterCurrentInstance;
}
__declspec(noinline) uint64 PPCInterpreter_getMainCoreCycleCounter()
{
return PPCTimer_getFromRDTSC();
}
void PPCInterpreter_nextInstruction(PPCInterpreter_t* cpuInterpreter)
{
cpuInterpreter->instructionPointer += 4;
}
void PPCInterpreter_jumpToInstruction(PPCInterpreter_t* cpuInterpreter, uint32 newIP)
{
cpuInterpreter->instructionPointer = (uint32)newIP;
}
void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
{
hCPU->sprExtended.DEC = newValue;
ppcMainThreadDECCycleStart = PPCInterpreter_getMainCoreCycleCounter();
ppcMainThreadDECCycleValue = newValue;
}
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU)
{
uint32 xerValue = hCPU->spr.XER;
xerValue &= ~(1<<XER_BIT_CA);
if( hCPU->xer_ca )
xerValue |= (1<<XER_BIT_CA);
return xerValue;
}
void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v)
{
hCPU->spr.XER = v;
hCPU->xer_ca = (v>>XER_BIT_CA)&1;
}
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU)
{
return hCPU->spr.UPIR;
};
uint32 PPCInterpreter_getCurrentCoreIndex()
{
return ppcInterpreterCurrentInstance->spr.UPIR;
};
uint8* PPCInterpreterGetStackPointer()
{
return memory_getPointerFromVirtualOffset(ppcInterpreterCurrentInstance->gpr[1]);
}
uint8* PPCInterpreterGetAndModifyStackPointer(sint32 offset)
{
uint8* result = memory_getPointerFromVirtualOffset(ppcInterpreterCurrentInstance->gpr[1] - offset);
ppcInterpreterCurrentInstance->gpr[1] -= offset;
return result;
}
void PPCInterpreterModifyStackPointer(sint32 offset)
{
ppcInterpreterCurrentInstance->gpr[1] -= offset;
}
uint32 RPLLoader_MakePPCCallable(void(*ppcCallableExport)(PPCInterpreter_t* hCPU));
// deprecated wrapper, use RPLLoader_MakePPCCallable directly
uint32 PPCInterpreter_makeCallableExportDepr(void (*ppcCallableExport)(PPCInterpreter_t* hCPU))
{
return RPLLoader_MakePPCCallable(ppcCallableExport);
}

View file

@ -0,0 +1,360 @@
#include "../PPCState.h"
#include "PPCInterpreterInternal.h"
#include "PPCInterpreterHelper.h"
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
#include "../Recompiler/PPCRecompiler.h"
#include "../Recompiler/PPCRecompilerX64.h"
#include <float.h>
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
void PPCInterpreter_MFMSR(PPCInterpreter_t* hCPU, uint32 Opcode)
{
forceLogDebug_printf("Rare instruction: MFMSR");
if (hCPU->sprExtended.msr & MSR_PR)
{
PPC_ASSERT(true);
return;
}
int rD, rA, rB;
PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
hCPU->gpr[rD] = hCPU->sprExtended.msr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_MTMSR(PPCInterpreter_t* hCPU, uint32 Opcode)
{
forceLogDebug_printf("Rare instruction: MTMSR");
if (hCPU->sprExtended.msr & MSR_PR)
{
PPC_ASSERT(true);
return;
}
int rS, rA, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
hCPU->sprExtended.msr = hCPU->gpr[rS];
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_MTFSB1X(PPCInterpreter_t* hCPU, uint32 Opcode)
{
forceLogDebug_printf("Rare instruction: MTFSB1X");
int crbD, n1, n2;
PPC_OPC_TEMPL_X(Opcode, crbD, n1, n2);
if (crbD != 1 && crbD != 2)
{
hCPU->fpscr |= 1 << (31 - crbD);
}
if (Opcode & PPC_OPC_RC)
{
// update cr1 flags
PPC_ASSERT(true);
}
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_MCRF(PPCInterpreter_t* hCPU, uint32 Opcode)
{
uint32 crD, crS, b;
PPC_OPC_TEMPL_X(Opcode, crD, crS, b);
crD >>= 2;
crS >>= 2;
for (sint32 i = 0; i<4; i++)
ppc_setCRBit(hCPU, crD * 4 + i, ppc_getCRBit(hCPU, crS * 4 + i));
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_MFCR(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// frequently used by GCC compiled code (e.g. SM64 port)
int rD, rA, rB;
PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
// in our array: cr0.LT is entry with index 0
// in GPR: cr0.LT is in MSB
uint32 cr = 0;
for (sint32 i = 0; i < 32; i++)
{
cr <<= 1;
if (ppc_getCRBit(hCPU, i) != 0)
cr |= 1;
}
hCPU->gpr[rD] = cr;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_MTCRF(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// frequently used by GCC compiled code (e.g. SM64 port)
// tested
uint32 rS;
uint32 crfMask;
PPC_OPC_TEMPL_XFX(Opcode, rS, crfMask);
for (sint32 crIndex = 0; crIndex < 8; crIndex++)
{
if (!ppc_MTCRFMaskHasCRFieldSet(crfMask, crIndex))
continue;
uint32 crBitBase = crIndex * 4;
uint8 nibble = (uint8)(hCPU->gpr[rS] >> (28 - crIndex * 4));
ppc_setCRBit(hCPU, crBitBase + 0, (nibble >> 3) & 1);
ppc_setCRBit(hCPU, crBitBase + 1, (nibble >> 2) & 1);
ppc_setCRBit(hCPU, crBitBase + 2, (nibble >> 1) & 1);
ppc_setCRBit(hCPU, crBitBase + 3, (nibble >> 0) & 1);
}
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_MCRXR(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// used in Dont Starve: Giant Edition
// also used frequently by Web Browser (webkit?)
uint32 cr;
cr = (Opcode >> (31 - 8)) & 7;
cr >>= 2;
uint32 xer = PPCInterpreter_getXER(hCPU);
uint32 xerBits = (xer >> 28) & 0xF;
// todo - is the order correct?
ppc_setCRBit(hCPU, cr * 4 + 0, (xerBits >> 0) & 1);
ppc_setCRBit(hCPU, cr * 4 + 1, (xerBits >> 1) & 1);
ppc_setCRBit(hCPU, cr * 4 + 2, (xerBits >> 2) & 1);
ppc_setCRBit(hCPU, cr * 4 + 3, (xerBits >> 3) & 1);
// reset copied bits
PPCInterpreter_setXER(hCPU, xer&~0xF0000000);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_TLBIE(PPCInterpreter_t* hCPU, uint32 Opcode)
{
int rS, rA, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_TLBSYNC(PPCInterpreter_t* hCPU, uint32 Opcode)
{
cemu_assert_unimplemented();
PPCInterpreter_nextInstruction(hCPU);
}
// branch instructions
void PPCInterpreter_BX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
uint32 li;
PPC_OPC_TEMPL_I(Opcode, li);
if ((Opcode & PPC_OPC_AA) == 0)
li += (unsigned int)hCPU->instructionPointer;
if (Opcode & PPC_OPC_LK)
{
// update LR and IP
hCPU->spr.LR = (unsigned int)hCPU->instructionPointer + 4;
hCPU->instructionPointer = li;
PPCInterpreter_jumpToInstruction(hCPU, li);
PPCRecompiler_attemptEnter(hCPU, li);
return;
}
PPCInterpreter_jumpToInstruction(hCPU, li);
}
void PPCInterpreter_BCX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
uint32 BO, BI, BD;
PPC_OPC_TEMPL_B(Opcode, BO, BI, BD);
if (!(BO & 4))
hCPU->spr.CTR--;
bool bo2 = (BO & 2) != 0;
bool bo8 = (BO & 8) != 0; // branch condition true
bool cr = ppc_getCRBit(hCPU, BI) != 0;
if (((BO & 4) || ((hCPU->spr.CTR != 0) ^ bo2))
&& ((BO & 16) || (!(cr ^ bo8))))
{
if (!(Opcode & PPC_OPC_AA))
{
BD += (unsigned int)hCPU->instructionPointer;
}
else
{
// should never happen
cemu_assert_unimplemented();
}
if (Opcode & PPC_OPC_LK)
hCPU->spr.LR = ((unsigned int)hCPU->instructionPointer) + 4;
PPCInterpreter_jumpToInstruction(hCPU, BD);
}
else
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_BCLRX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
uint32 BO, BI, BD;
PPC_OPC_TEMPL_XL(Opcode, BO, BI, BD);
PPC_ASSERT(BD == 0);
if (!(BO & 4))
{
if (hCPU->spr.CTR == 0)
{
PPC_ASSERT(true);
forceLogDebug_printf("Decrementer underflow!\n");
}
hCPU->spr.CTR--;
}
bool bo2 = (BO & 2) ? true : false;
bool bo8 = (BO & 8) ? true : false;
bool cr = ppc_getCRBit(hCPU, BI) != 0;
if (((BO & 4) || ((hCPU->spr.CTR != 0) ^ bo2))
&& ((BO & 16) || (!(cr ^ bo8))))
{
BD = hCPU->spr.LR & 0xfffffffc;
if (Opcode & PPC_OPC_LK)
{
hCPU->spr.LR = (unsigned int)hCPU->instructionPointer + 4;
}
PPCInterpreter_jumpToInstruction(hCPU, BD);
PPCRecompiler_attemptEnter(hCPU, BD);
return;
}
else
{
BD = (unsigned int)hCPU->instructionPointer + 4;
PPCInterpreter_nextInstruction(hCPU);
}
}
void PPCInterpreter_BCCTR(PPCInterpreter_t* hCPU, uint32 Opcode)
{
uint32 x = (unsigned int)hCPU->instructionPointer;
uint32 BO, BI, BD;
PPC_OPC_TEMPL_XL(Opcode, BO, BI, BD);
PPC_ASSERT(BD == 0);
PPC_ASSERT(!(BO & 2));
bool bo8 = (BO & 8) ? true : false;
bool cr = ppc_getCRBit(hCPU, BI) != 0;
if ((BO & 16) || (!(cr ^ bo8)))
{
if (Opcode & PPC_OPC_LK)
{
hCPU->spr.LR = (unsigned int)hCPU->instructionPointer + 4;
hCPU->instructionPointer = (unsigned int)(hCPU->spr.CTR & 0xfffffffc);
}
else
{
hCPU->instructionPointer = (unsigned int)(hCPU->spr.CTR & 0xfffffffc);
}
PPCRecompiler_attemptEnter(hCPU, hCPU->instructionPointer);
}
else
{
hCPU->instructionPointer += 4;
}
}
void PPCInterpreter_DCBT(PPCInterpreter_t* hCPU, uint32 Opcode)
{
sint32 rA, rB;
rA = (Opcode >> (31 - 15)) & 0x1F;
rB = (Opcode >> (31 - 20)) & 0x1F;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_DCBST(PPCInterpreter_t* hCPU, uint32 Opcode)
{
sint32 rA, rB;
rA = (Opcode >> (31 - 15)) & 0x1F;
rB = (Opcode >> (31 - 20)) & 0x1F;
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB];
LatteBufferCache_notifyDCFlush(ea, 32);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_DCBF(PPCInterpreter_t* hCPU, uint32 Opcode)
{
sint32 rA, rB;
rA = (Opcode >> (31 - 15)) & 0x1F;
rB = (Opcode >> (31 - 20)) & 0x1F;
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB];
LatteBufferCache_notifyDCFlush(ea, 32);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_DCBZL(PPCInterpreter_t* hCPU, uint32 Opcode) //Undocumented
{
// no-op
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_DCBI(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// no-op
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_ICBI(PPCInterpreter_t* hCPU, uint32 Opcode)
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB];
// invalidate range
coreinit::codeGenHandleICBI(ea);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_EIEIO(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// no effect
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_SC(PPCInterpreter_t* hCPU, uint32 Opcode)
{
forceLogDebug_printf("SC executed at 0x%08x", hCPU->instructionPointer);
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_SYNC(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// no-op
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_ISYNC(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// no-op
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_RFI(PPCInterpreter_t* hCPU, uint32 Opcode)
{
forceLogDebug_printf("RFI");
hCPU->sprExtended.msr &= ~(0x87C0FF73 | 0x00040000);
hCPU->sprExtended.msr |= hCPU->sprExtended.srr1 & 0x87c0ff73;
hCPU->sprExtended.msr |= MSR_RI;
hCPU->instructionPointer = (unsigned int)(hCPU->sprExtended.srr0);
}

View file

@ -0,0 +1,73 @@
static void PPCInterpreter_MFSPR(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 rD, spr1, spr2, spr;
PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
spr = spr1 | (spr2 << 5);
// copy SPR
hCPU->gpr[rD] = PPCSpr_get(hCPU, spr);
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MTSPR(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 rD, spr1, spr2, spr;
PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
spr = spr1 | (spr2 << 5);
PPCSpr_set(hCPU, spr, hCPU->gpr[rD]);
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MFSR(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 rD, SR, rB;
PPC_OPC_TEMPL_X(opcode, rD, SR, rB);
hCPU->gpr[rD] = getSR(hCPU, SR & 0xF);
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MTSR(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 rS, SR, rB;
PPC_OPC_TEMPL_X(opcode, rS, SR, rB);
setSR(hCPU, SR&0xF, hCPU->gpr[rS]);
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_MFTB(PPCInterpreter_t* hCPU, uint32 opcode)
{
uint32 rD, spr1, spr2, spr;
// get SPR ID
PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
spr = spr1 | (spr2 << 5);
// get core cycle counter
uint64 coreTime = ppcItpCtrl::getTB(hCPU);
switch (spr)
{
case 268: // TBL
hCPU->gpr[rD] = (uint32)(coreTime & 0xFFFFFFFF);
break;
case 269: // TBU
hCPU->gpr[rD] = (uint32)((coreTime >> 32) & 0xFFFFFFFF);
break;
default:
assert_dbg();
}
// next instruction
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_TW(PPCInterpreter_t* hCPU, uint32 opcode)
{
sint32 to, rA, rB;
PPC_OPC_TEMPL_X(opcode, to, rB, rA);
cemu_assert_debug(to == 0);
debugger_enterTW(hCPU);
}

View file

@ -0,0 +1,506 @@
#include "PPCInterpreterInternal.h"
// Gekko paired single math
void PPCInterpreter_PS_ADD(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp0);
hCPU->fpr[frD].fp1 = (float)(hCPU->fpr[frA].fp1 + hCPU->fpr[frB].fp1);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_SUB(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 - hCPU->fpr[frB].fp0);
hCPU->fpr[frD].fp1 = (float)(hCPU->fpr[frA].fp1 - hCPU->fpr[frB].fp1);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MUL(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frC;
frC = (Opcode>>6)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0 = flushDenormalToZero((float)(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0)));
hCPU->fpr[frD].fp1 = flushDenormalToZero((float)(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1)));
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_DIV(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 / hCPU->fpr[frB].fp0);
hCPU->fpr[frD].fp1 = (float)(hCPU->fpr[frA].fp1 / hCPU->fpr[frB].fp1);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MADD(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode>>6)&0x1F;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
float s0 = (float)((float)(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0)) + hCPU->fpr[frB].fp0);
float s1 = (float)((float)(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1)) + hCPU->fpr[frB].fp1);
hCPU->fpr[frD].fp0 = flushDenormalToZero(s0);
hCPU->fpr[frD].fp1 = flushDenormalToZero(s1);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_NMADD(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode>>6)&0x1F;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
float s0 = (float)-(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) + hCPU->fpr[frB].fp0);
float s1 = (float)-(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1) + hCPU->fpr[frB].fp1);
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode >> 6) & 0x1F;
frB = (Opcode >> 11) & 0x1F;
frA = (Opcode >> 16) & 0x1F;
frD = (Opcode >> 21) & 0x1F;
float s0 = (float)(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) - hCPU->fpr[frB].fp0);
float s1 = (float)(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1) - hCPU->fpr[frB].fp1);
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_NMSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode >> 6) & 0x1F;
frB = (Opcode >> 11) & 0x1F;
frA = (Opcode >> 16) & 0x1F;
frD = (Opcode >> 21) & 0x1F;
float s0 = (float)-(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) - hCPU->fpr[frB].fp0);
float s1 = (float)-(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1) - hCPU->fpr[frB].fp1);
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MADDS0(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode>>6)&0x1F;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
double c = roundTo25BitAccuracy(hCPU->fpr[frC].fp0);
float s0 = (float)(hCPU->fpr[frA].fp0 * c + hCPU->fpr[frB].fp0);
float s1 = (float)(hCPU->fpr[frA].fp1 * c + hCPU->fpr[frB].fp1);
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MADDS1(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode>>6)&0x1F;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
double c = roundTo25BitAccuracy(hCPU->fpr[frC].fp1);
float s0 = (float)(hCPU->fpr[frA].fp0 * c + hCPU->fpr[frB].fp0);
float s1 = (float)(hCPU->fpr[frA].fp1 * c + hCPU->fpr[frB].fp1);
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_SEL(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode>>6)&0x1F;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
if( hCPU->fpr[frA].fp0 >= -0.0f )
hCPU->fpr[frD].fp0 = hCPU->fpr[frC].fp0;
else
hCPU->fpr[frD].fp0 = hCPU->fpr[frB].fp0;
if( hCPU->fpr[frA].fp1 >= -0.0f )
hCPU->fpr[frD].fp1 = hCPU->fpr[frC].fp1;
else
hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_SUM0(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode>>6)&0x1F;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
float s0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
float s1 = (float)hCPU->fpr[frC].fp1;
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_SUM1(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB, frC;
frC = (Opcode>>6)&0x1F;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
float s0 = (float)hCPU->fpr[frC].fp0;
float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MULS0(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frC;
frC = (Opcode>>6)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
double c = roundTo25BitAccuracy(hCPU->fpr[frC].fp0);
float s0 = (float)(hCPU->fpr[frA].fp0 * c);
float s1 = (float)(hCPU->fpr[frA].fp1 * c);
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MULS1(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frC;
frC = (Opcode>>6)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
double c = roundTo25BitAccuracy(hCPU->fpr[frC].fp1);
float s0 = (float)(hCPU->fpr[frA].fp0 * c);
float s1 = (float)(hCPU->fpr[frA].fp1 * c);
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MR(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frB;
frB = (Opcode>>11)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0 = hCPU->fpr[frB].fp0;
hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_NEG(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frB;
frB = (Opcode>>11)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0 = -hCPU->fpr[frB].fp0;
hCPU->fpr[frD].fp1 = -hCPU->fpr[frB].fp1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_ABS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frB;
frB = (Opcode>>11)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0int = hCPU->fpr[frB].fp0int & ~(1ULL << 63);
hCPU->fpr[frD].fp1int = hCPU->fpr[frB].fp1int & ~(1ULL << 63);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_NABS(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frB;
frB = (Opcode>>11)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0int = hCPU->fpr[frB].fp0int | (1ULL << 63);
hCPU->fpr[frD].fp1int = hCPU->fpr[frB].fp1int | (1ULL << 63);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_RSQRTE(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frB;
frB = (Opcode>>11)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0 = (float)frsqrte_espresso(hCPU->fpr[frB].fp0);
hCPU->fpr[frD].fp1 = (float)frsqrte_espresso(hCPU->fpr[frB].fp1);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MERGE00(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
double s0 = hCPU->fpr[frA].fp0;
double s1 = hCPU->fpr[frB].fp0;
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MERGE01(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
double s0 = hCPU->fpr[frA].fp0;
double s1 = hCPU->fpr[frB].fp1;
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MERGE10(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
double s0 = hCPU->fpr[frA].fp1;
double s1 = hCPU->fpr[frB].fp0;
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_MERGE11(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frA, frB;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
frD = (Opcode>>21)&0x1F;
double s0 = hCPU->fpr[frA].fp1;
double s1 = hCPU->fpr[frB].fp1;
hCPU->fpr[frD].fp0 = s0;
hCPU->fpr[frD].fp1 = s1;
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_RES(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 frD, frB;
frB = (Opcode>>11)&0x1F;
frD = (Opcode>>21)&0x1F;
hCPU->fpr[frD].fp0 = (float)fres_espresso(hCPU->fpr[frB].fp0);
hCPU->fpr[frD].fp1 = (float)fres_espresso(hCPU->fpr[frB].fp1);
PPCInterpreter_nextInstruction(hCPU);
}
// PS compare
void PPCInterpreter_PS_CMPO0(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 crfD, frA, frB;
uint32 c=0;
frB = (Opcode>>11)&0x1F;
frA = (Opcode>>16)&0x1F;
crfD = (Opcode>>23)&0x7;
double a = hCPU->fpr[frA].fp0;
double b = hCPU->fpr[frB].fp0;
ppc_setCRBit(hCPU, crfD*4+0, 0);
ppc_setCRBit(hCPU, crfD*4+1, 0);
ppc_setCRBit(hCPU, crfD*4+2, 0);
ppc_setCRBit(hCPU, crfD*4+3, 0);
if(IS_NAN(*(uint64*)&a) || IS_NAN(*(uint64*)&b))
{
c = 1;
ppc_setCRBit(hCPU, crfD*4+CR_BIT_SO, 1);
}
else if(a < b)
{
c = 8;
ppc_setCRBit(hCPU, crfD*4+CR_BIT_LT, 1);
}
else if(a > b)
{
c = 4;
ppc_setCRBit(hCPU, crfD*4+CR_BIT_GT, 1);
}
else
{
c = 2;
ppc_setCRBit(hCPU, crfD*4+CR_BIT_EQ, 1);
}
hCPU->fpscr = (hCPU->fpscr & 0xffff0fff) | (c << 12);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_CMPU0(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 crfD, frA, frB;
frB = (Opcode >> 11) & 0x1F;
frA = (Opcode >> 16) & 0x1F;
crfD = (Opcode >> 21) & (0x7<<2);
fcmpu_espresso(hCPU, crfD, hCPU->fpr[frA].fp0, hCPU->fpr[frB].fp0);
PPCInterpreter_nextInstruction(hCPU);
}
void PPCInterpreter_PS_CMPU1(PPCInterpreter_t* hCPU, uint32 Opcode)
{
FPUCheckAvailable();
sint32 crfD, frA, frB;
frB = (Opcode >> 11) & 0x1F;
frA = (Opcode >> 16) & 0x1F;
crfD = (Opcode >> 21) & (0x7 << 2);
double a = hCPU->fpr[frA].fp1;
double b = hCPU->fpr[frB].fp1;
fcmpu_espresso(hCPU, crfD, hCPU->fpr[frA].fp1, hCPU->fpr[frB].fp1);
PPCInterpreter_nextInstruction(hCPU);
}

View file

@ -0,0 +1,875 @@
#define SPR_TBL_WRITE (284)
#define SPR_TBU_WRITE (285)
#define SPR_DBATU_0 (536)
#define SPR_DBATU_1 (538)
#define SPR_DBATU_2 (540)
#define SPR_DBATU_3 (542)
#define SPR_DBATU_4 (568)
#define SPR_DBATU_5 (570)
#define SPR_DBATU_6 (572)
#define SPR_DBATU_7 (574)
#define SPR_DBATL_0 (537)
#define SPR_DBATL_1 (539)
#define SPR_DBATL_2 (541)
#define SPR_DBATL_3 (543)
#define SPR_DBATL_4 (569)
#define SPR_DBATL_5 (571)
#define SPR_DBATL_6 (573)
#define SPR_DBATL_7 (575)
#define SPR_IBATU_0 (528)
#define SPR_IBATU_1 (530)
#define SPR_IBATU_2 (532)
#define SPR_IBATU_3 (534)
#define SPR_IBATU_4 (560)
#define SPR_IBATU_5 (562)
#define SPR_IBATU_6 (564)
#define SPR_IBATU_7 (566)
#define SPR_IBATL_0 (529)
#define SPR_IBATL_1 (531)
#define SPR_IBATL_2 (533)
#define SPR_IBATL_3 (535)
#define SPR_IBATL_4 (561)
#define SPR_IBATL_5 (563)
#define SPR_IBATL_6 (565)
#define SPR_IBATL_7 (567)
#define SPR_DSISR (18)
#define SPR_DAR (19)
#define SPR_SPRG0 (272)
#define SPR_SPRG1 (273)
#define SPR_SPRG2 (274)
#define SPR_SPRG3 (275)
//#define SPR_HID0 (1008)
//#define SPR_HID2 (920)
#define SPR_HID4 (1011)
#define SPR_HID5 (944)
#define SPR_L2CR (1017) // L2 cache control
#define SPR_CAR (948) // global
#define SPR_BCR (949) // global
static uint32 getPVR(PPCInterpreter_t* hCPU)
{
return 0x70010101; // guessed
}
static uint32 getFPECR(PPCInterpreter_t* hCPU)
{
return hCPU->sprExtended.fpecr;
}
static void setFPECR(PPCInterpreter_t* hCPU, uint32 newValue)
{
hCPU->sprExtended.fpecr = newValue;
}
static void setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
{
debug_printf("Set DEC to 0x%08x\n", newValue);
//hCPU->sprExtended.fpecr = newValue;
}
static uint32 getSPRG(PPCInterpreter_t* hCPU, uint32 sprgIndex)
{
return hCPU->sprExtended.sprg[sprgIndex];
}
static void setSPRG(PPCInterpreter_t* hCPU, uint32 sprgIndex, uint32 newValue)
{
hCPU->sprExtended.sprg[sprgIndex] = newValue;
}
static uint32 getDAR(PPCInterpreter_t* hCPU)
{
return hCPU->sprExtended.dar;
}
static uint32 getDSISR(PPCInterpreter_t* hCPU)
{
return hCPU->sprExtended.dsisr;
}
static uint32 getHID0(PPCInterpreter_t* hCPU)
{
return 0; // todo
}
static void setHID0(PPCInterpreter_t* hCPU, uint32 newValue)
{
// todo
debug_printf("Set HID0 to 0x%08x\n", newValue);
}
static uint32 getHID1(PPCInterpreter_t* hCPU)
{
debug_printf("Get HID1 IP 0x%08x\n", hCPU->instructionPointer);
return 0; // todo
}
static uint32 getHID2(PPCInterpreter_t* hCPU)
{
debug_printf("Get HID2 IP 0x%08x\n", hCPU->instructionPointer);
return 0; // todo
}
static void setHID2(PPCInterpreter_t* hCPU, uint32 newValue)
{
// todo
debug_printf("Set HID2 to 0x%08x\n", newValue);
}
static uint32 getHID4(PPCInterpreter_t* hCPU)
{
debug_printf("Get HID4 IP 0x%08x\n", hCPU->instructionPointer);
return 0; // todo
}
static void setHID4(PPCInterpreter_t* hCPU, uint32 newValue)
{
// todo
debug_printf("Set HID4 to 0x%08x\n", newValue);
}
static uint32 getHID5(PPCInterpreter_t* hCPU)
{
// Wii-U only
debug_printf("Get HID5 IP 0x%08x\n", hCPU->instructionPointer);
return 0; // todo
}
static void setHID5(PPCInterpreter_t* hCPU, uint32 newValue)
{
// Wii-U only
// todo
debug_printf("Set HID5 to 0x%08x\n", newValue);
}
static uint32 getSCR(PPCInterpreter_t* hCPU)
{
// WiiU mode only?
return 0; // todo
}
static void setSCR(PPCInterpreter_t* hCPU, uint32 newValue)
{
uint32 previousSCR = hCPU->global->sprGlobal.scr;
newValue |= (previousSCR&0x80000000); // this bit always sticks?
if ((previousSCR&0x80000000) == 0 && (newValue & 0x80000000) != 0)
{
// this bit is used to disable bootrom mapping, but we use it to know when to copy the decrypted ancast image into kernel memory
debug_printf("SCR MSB set. Unmap bootrom?\n");
//memcpy(memory_base + 0xFFE00000, memory_base + 0x08000000, 0x180000);
// hack - clear low memory (where bootrom was mapped/loaded)
memset(memory_base, 0, 0x4000);
//// todo - normally IOSU sets up some stuff here (probably)
// for debugging purposes make lowest page read-only
#ifdef _WIN32
DWORD oldProtect;
VirtualProtect(memory_base, 0x1000, PAGE_READONLY, &oldProtect);
#endif
}
debug_printf("Set SCR to 0x%08x\n", newValue);
hCPU->global->sprGlobal.scr = newValue;
}
// SCR probably has bits to control following:
// disable bootrom (bit 0x80000000)
// disable PPC OTP
// bits to start the extra cores
static uint32 getCAR(PPCInterpreter_t* hCPU)
{
// global
// WiiU mode only
return 0; // todo
}
static void setCAR(PPCInterpreter_t* hCPU, uint32 newValue)
{
// global
// WiiU mode only
debug_printf("Set CAR to 0x%08x\n", newValue);
}
static uint32 getBCR(PPCInterpreter_t* hCPU)
{
// global
// WiiU mode only
return 0; // todo
}
static void setBCR(PPCInterpreter_t* hCPU, uint32 newValue)
{
// global
// WiiU mode only
debug_printf("Set BCR to 0x%08x\n", newValue);
}
static uint32 getL2CR(PPCInterpreter_t* hCPU)
{
return 0; // todo
}
static void setL2CR(PPCInterpreter_t* hCPU, uint32 newValue)
{
// todo
}
static void setSRR0(PPCInterpreter_t* hCPU, uint32 newValue)
{
hCPU->sprExtended.srr0 = newValue;
}
static void setSRR1(PPCInterpreter_t* hCPU, uint32 newValue)
{
hCPU->sprExtended.srr1 = newValue;
}
static void setDMAU(PPCInterpreter_t* hCPU, uint32 newValue)
{
hCPU->sprExtended.dmaU = newValue;
}
static void setDMAL(PPCInterpreter_t* hCPU, uint32 newValue)
{
hCPU->sprExtended.dmaL = newValue;
// LC DMA
if(newValue &0x2 )
{
uint32 transferLength = (((hCPU->sprExtended.dmaU>>0)&0x1F)<<2)|((newValue>>2)&3);
uint32 memAddr = (hCPU->sprExtended.dmaU)&0xFFFFFFE0;
uint32 cacheAddr = (newValue)&0xFFFFFFE0;
if( transferLength == 0 )
transferLength = 128;
transferLength *= 32;
bool isLoad = ((newValue>>4)&1)!=0;
if( (cacheAddr>>28) != 0xE )
{
debug_printf("LCTransfer: Not a cache address\n");
cacheAddr = 0;
}
else
{
cacheAddr -= 0xE0000000;
}
if( isLoad == 0 )
{
// locked cache -> memory
debug_printf("L2->MEM %08x -> %08x size: 0x%x\n", memAddr, 0xE0000000 + cacheAddr, transferLength);
memcpy(memory_getPointerFromVirtualOffset(memAddr), memory_base+0xE0000000+cacheAddr, transferLength);
}
else
{
// memory -> locked cache
debug_printf("MEM->L2 %08x -> %08x size: 0x%x\n", 0xE0000000 + cacheAddr, memAddr, transferLength);
memcpy(memory_base + 0xE0000000 + cacheAddr, memory_getPointerFromVirtualOffset(memAddr), transferLength);
}
newValue &= ~2;
hCPU->sprExtended.dmaL = newValue;
}
}
static void setDBATL(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
{
debug_printf("Set DBATL%d to 0x%08x\n", index, newValue);
hCPU->sprExtended.dbatL[index] = newValue;
}
static void setDBATU(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
{
debug_printf("Set DBATU%d to 0x%08x\n", index, newValue);
hCPU->sprExtended.dbatU[index] = newValue;
}
static void setIBATL(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
{
debug_printf("Set IBATL%d to 0x%08x\n", index, newValue);
hCPU->sprExtended.ibatL[index] = newValue;
}
static void setIBATU(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
{
debug_printf("Set IBATU%d to 0x%08x\n", index, newValue);
hCPU->sprExtended.ibatU[index] = newValue;
}
static uint32 getDBATL(PPCInterpreter_t* hCPU, uint32 index)
{
return hCPU->sprExtended.dbatL[index];
}
static uint32 getDBATU(PPCInterpreter_t* hCPU, uint32 index)
{
return hCPU->sprExtended.dbatU[index];
}
static uint32 getIBATL(PPCInterpreter_t* hCPU, uint32 index)
{
return hCPU->sprExtended.ibatL[index];
}
static uint32 getIBATU(PPCInterpreter_t* hCPU, uint32 index)
{
return hCPU->sprExtended.ibatU[index];
}
static void setSR(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
{
debug_printf("Set SR%d to 0x%08x IP %08x LR %08x\n", index, newValue, hCPU->instructionPointer, hCPU->spr.LR);
hCPU->sprExtended.sr[index] = newValue;
}
static uint32 getSR(PPCInterpreter_t* hCPU, uint32 index)
{
return hCPU->sprExtended.sr[index];
}
static void setSDR1(PPCInterpreter_t* hCPU, uint32 newValue)
{
debug_printf("Set SDR1 to 0x%08x\n", newValue);
hCPU->sprExtended.sdr1 = newValue;
}
static void setTBL(PPCInterpreter_t* hCPU, uint32 newValue)
{
if (newValue != 0)
assert_dbg();
debug_printf("Reset TB\n");
hCPU->global->tb = 0;
}
static void setTBU(PPCInterpreter_t* hCPU, uint32 newValue)
{
if (newValue != 0)
assert_dbg();
debug_printf("Reset TB\n");
hCPU->global->tb = 0;
}
static void PPCSprSupervisor_set(PPCInterpreter_t* hCPU, uint32 spr, uint32 newValue)
{
switch (spr)
{
case SPR_LR:
hCPU->spr.LR = newValue;
break;
case SPR_CTR:
hCPU->spr.CTR = newValue;
break;
case SPR_DEC:
setDEC(hCPU, newValue);
break;
case SPR_XER:
PPCInterpreter_setXER(hCPU, newValue);
break;
case SPR_UGQR0:
case SPR_UGQR1:
case SPR_UGQR2:
case SPR_UGQR3:
case SPR_UGQR4:
case SPR_UGQR5:
case SPR_UGQR6:
case SPR_UGQR7:
hCPU->spr.UGQR[spr - SPR_UGQR0] = newValue;
break;
// values above are user mode accessible
case SPR_TBL_WRITE: // TBL
setTBL(hCPU, newValue);
break;
case SPR_TBU_WRITE: // TBU
setTBU(hCPU, newValue);
break;
case SPR_FPECR:
setFPECR(hCPU, newValue);
break;
case SPR_HID0:
setHID0(hCPU, newValue);
break;
case SPR_HID2:
setHID2(hCPU, newValue);
break;
case SPR_HID4:
setHID4(hCPU, newValue);
break;
case SPR_HID5:
setHID5(hCPU, newValue);
break;
case SPR_L2CR:
setL2CR(hCPU, newValue);
break;
case SPR_SRR0:
setSRR0(hCPU, newValue);
break;
case SPR_SRR1:
setSRR1(hCPU, newValue);
break;
case SPR_SPRG0:
setSPRG(hCPU, 0, newValue);
break;
case SPR_SPRG1:
setSPRG(hCPU, 1, newValue);
break;
case SPR_SPRG2:
setSPRG(hCPU, 2, newValue);
break;
case SPR_SPRG3:
setSPRG(hCPU, 3, newValue);
break;
case SPR_SCR:
setSCR(hCPU, newValue);
break;
case SPR_CAR:
setCAR(hCPU, newValue);
break;
case SPR_BCR:
setBCR(hCPU, newValue);
break;
case SPR_DMAU:
setDMAU(hCPU, newValue);
break;
case SPR_DMAL:
setDMAL(hCPU, newValue);
break;
case SPR_DBATU_0:
setDBATU(hCPU, 0, newValue);
break;
case SPR_DBATU_1:
setDBATU(hCPU, 1, newValue);
break;
case SPR_DBATU_2:
setDBATU(hCPU, 2, newValue);
break;
case SPR_DBATU_3:
setDBATU(hCPU, 3, newValue);
break;
case SPR_DBATU_4:
setDBATU(hCPU, 4, newValue);
break;
case SPR_DBATU_5:
setDBATU(hCPU, 5, newValue);
break;
case SPR_DBATU_6:
setDBATU(hCPU, 6, newValue);
break;
case SPR_DBATU_7:
setDBATU(hCPU, 7, newValue);
break;
case SPR_DBATL_0:
setDBATL(hCPU, 0, newValue);
break;
case SPR_DBATL_1:
setDBATL(hCPU, 1, newValue);
break;
case SPR_DBATL_2:
setDBATL(hCPU, 2, newValue);
break;
case SPR_DBATL_3:
setDBATL(hCPU, 3, newValue);
break;
case SPR_DBATL_4:
setDBATL(hCPU, 4, newValue);
break;
case SPR_DBATL_5:
setDBATL(hCPU, 5, newValue);
break;
case SPR_DBATL_6:
setDBATL(hCPU, 6, newValue);
break;
case SPR_DBATL_7:
setDBATL(hCPU, 7, newValue);
break;
case SPR_IBATU_0:
setIBATU(hCPU, 0, newValue);
break;
case SPR_IBATU_1:
setIBATU(hCPU, 1, newValue);
break;
case SPR_IBATU_2:
setIBATU(hCPU, 2, newValue);
break;
case SPR_IBATU_3:
setIBATU(hCPU, 3, newValue);
break;
case SPR_IBATU_4:
setIBATU(hCPU, 4, newValue);
break;
case SPR_IBATU_5:
setIBATU(hCPU, 5, newValue);
break;
case SPR_IBATU_6:
setIBATU(hCPU, 6, newValue);
break;
case SPR_IBATU_7:
setIBATU(hCPU, 7, newValue);
break;
case SPR_IBATL_0:
setIBATL(hCPU, 0, newValue);
break;
case SPR_IBATL_1:
setIBATL(hCPU, 1, newValue);
break;
case SPR_IBATL_2:
setIBATL(hCPU, 2, newValue);
break;
case SPR_IBATL_3:
setIBATL(hCPU, 3, newValue);
break;
case SPR_IBATL_4:
setIBATL(hCPU, 4, newValue);
break;
case SPR_IBATL_5:
setIBATL(hCPU, 5, newValue);
break;
case SPR_IBATL_6:
setIBATL(hCPU, 6, newValue);
break;
case SPR_IBATL_7:
setIBATL(hCPU, 7, newValue);
break;
case SPR_SDR1:
setSDR1(hCPU, newValue);
break;
case 0x3B8: // mmcr0
debug_printf("Write performance monitor SPR mmcr0 0x%08x", newValue);
break;
case 0x3B9: // PMC1
debug_printf("Write performance monitor SPR PMC1 0x%08x", newValue);
break;
case 0x3BA: // PMC2
debug_printf("Write performance monitor SPR PMC2 0x%08x", newValue);
break;
case 0x3BC: // mmcr1
debug_printf("Write performance monitor SPR mmcr1 0x%08x", newValue);
break;
case 0x3BD: // PMC3
debug_printf("Write performance monitor SPR PMC3 0x%08x", newValue);
break;
case 0x3BE: // PMC4
debug_printf("Write performance monitor SPR PMC4 0x%08x", newValue);
break;
default:
debug_printf("[C%d] Set unhandled SPR 0x%x to %08x (supervisor mode)\n", hCPU->spr.UPIR, spr, newValue);
#ifndef PUBLIC_RELEASE
assert_dbg();
#endif
break;
}
}
static void PPCSpr_set(PPCInterpreter_t* hCPU, uint32 spr, uint32 newValue)
{
if constexpr(ppcItpCtrl::allowSupervisorMode)
{
// todo - check if in supervisor mode or user mode
PPCSprSupervisor_set(hCPU, spr, newValue);
return;
}
switch (spr)
{
case SPR_LR:
hCPU->spr.LR = newValue;
break;
case SPR_CTR:
hCPU->spr.CTR = newValue;
break;
case SPR_XER:
PPCInterpreter_setXER(hCPU, newValue);
break;
case SPR_UGQR0:
case SPR_UGQR1:
case SPR_UGQR2:
case SPR_UGQR3:
case SPR_UGQR4:
case SPR_UGQR5:
case SPR_UGQR6:
case SPR_UGQR7:
hCPU->spr.UGQR[spr - SPR_UGQR0] = newValue;
break;
default:
debug_printf("[C%d] Set unhandled SPR %d to %08x\n", hCPU->spr.UPIR, spr, newValue);
#ifndef PUBLIC_RELEASE
assert_dbg();
#endif
break;
}
}
static uint32 PPCSprSupervisor_get(PPCInterpreter_t* hCPU, uint32 spr)
{
uint32 v = 0;
switch (spr)
{
case SPR_LR:
v = hCPU->spr.LR;
break;
case SPR_CTR:
v = hCPU->spr.CTR;
break;
case SPR_XER:
v = PPCInterpreter_getXER(hCPU);
break;
case SPR_UPIR:
v = hCPU->spr.UPIR;
break;
case SPR_UGQR0:
case SPR_UGQR1:
case SPR_UGQR2:
case SPR_UGQR3:
case SPR_UGQR4:
case SPR_UGQR5:
case SPR_UGQR6:
case SPR_UGQR7:
v = hCPU->spr.UGQR[spr - SPR_UGQR0];
break;
// above are registers accessible in user mode
case SPR_PVR:
v = getPVR(hCPU);
break;
case SPR_HID0:
v = getHID0(hCPU);
break;
case SPR_HID1:
v = getHID1(hCPU);
break;
case SPR_HID2:
v = getHID2(hCPU);
break;
case SPR_HID4:
v = getHID4(hCPU);
break;
case SPR_HID5:
v = getHID5(hCPU);
break;
case SPR_SCR:
v = getSCR(hCPU);
break;
case SPR_CAR:
v = getCAR(hCPU);
break;
case SPR_BCR:
v = getBCR(hCPU);
break;
case SPR_DAR:
v = getDAR(hCPU);
break;
case SPR_DSISR:
v = getDSISR(hCPU);
break;
case SPR_L2CR:
v = getL2CR(hCPU);
break;
case SPR_FPECR:
v = getFPECR(hCPU);
break;
case SPR_SPRG0:
v = getSPRG(hCPU, 0);
break;
case SPR_SPRG1:
v = getSPRG(hCPU, 1);
break;
case SPR_SPRG2:
v = getSPRG(hCPU, 2);
break;
case SPR_SPRG3:
v = getSPRG(hCPU, 3);
break;
case SPR_DBATU_0:
v = getDBATU(hCPU, 0);
break;
case SPR_DBATU_1:
v = getDBATU(hCPU, 1);
break;
case SPR_DBATU_2:
v = getDBATU(hCPU, 2);
break;
case SPR_DBATU_3:
v = getDBATU(hCPU, 3);
break;
case SPR_DBATU_4:
v = getDBATU(hCPU, 4);
break;
case SPR_DBATU_5:
v = getDBATU(hCPU, 5);
break;
case SPR_DBATU_6:
v = getDBATU(hCPU, 6);
break;
case SPR_DBATU_7:
v = getDBATU(hCPU, 7);
break;
case SPR_DBATL_0:
v = getDBATL(hCPU, 0);
break;
case SPR_DBATL_1:
v = getDBATL(hCPU, 1);
break;
case SPR_DBATL_2:
v = getDBATL(hCPU, 2);
break;
case SPR_DBATL_3:
v = getDBATL(hCPU, 3);
break;
case SPR_DBATL_4:
v = getDBATL(hCPU, 4);
break;
case SPR_DBATL_5:
v = getDBATL(hCPU, 5);
break;
case SPR_DBATL_6:
v = getDBATL(hCPU, 6);
break;
case SPR_DBATL_7:
v = getDBATL(hCPU, 7);
break;
case SPR_IBATU_0:
v = getIBATU(hCPU, 0);
break;
case SPR_IBATU_1:
v = getIBATU(hCPU, 1);
break;
case SPR_IBATU_2:
v = getIBATU(hCPU, 2);
break;
case SPR_IBATU_3:
v = getIBATU(hCPU, 3);
break;
case SPR_IBATU_4:
v = getIBATU(hCPU, 4);
break;
case SPR_IBATU_5:
v = getIBATU(hCPU, 5);
break;
case SPR_IBATU_6:
v = getIBATU(hCPU, 6);
break;
case SPR_IBATU_7:
v = getIBATU(hCPU, 7);
break;
case SPR_IBATL_0:
v = getIBATL(hCPU, 0);
break;
case SPR_IBATL_1:
v = getIBATL(hCPU, 1);
break;
case SPR_IBATL_2:
v = getIBATL(hCPU, 2);
break;
case SPR_IBATL_3:
v = getIBATL(hCPU, 3);
break;
case SPR_IBATL_4:
v = getIBATL(hCPU, 4);
break;
case SPR_IBATL_5:
v = getIBATL(hCPU, 5);
break;
case SPR_IBATL_6:
v = getIBATL(hCPU, 6);
break;
case SPR_IBATL_7:
v = getIBATL(hCPU, 7);
break;
default:
debug_printf("[C%d] Get unhandled SPR %d\n", hCPU->spr.UPIR, spr);
#ifndef PUBLIC_RELEASE
assert_dbg();
#endif
break;
}
return v;
}
static uint32 PPCSpr_get(PPCInterpreter_t* hCPU, uint32 spr)
{
if constexpr(ppcItpCtrl::allowSupervisorMode)
{
// todo - check if in supervisor mode or user mode
return PPCSprSupervisor_get(hCPU, spr);
}
uint32 v = 0;
switch (spr)
{
case SPR_LR:
v = hCPU->spr.LR;
break;
case SPR_CTR:
v = hCPU->spr.CTR;
break;
case SPR_XER:
v = PPCInterpreter_getXER(hCPU);
break;
case SPR_DEC:
// special handling for DEC register
{
assert_dbg();
uint64 passedCycled = PPCInterpreter_getMainCoreCycleCounter() - ppcMainThreadDECCycleStart;
if (passedCycled >= (uint64)ppcMainThreadDECCycleValue)
v = 0;
else
v = (uint32)(ppcMainThreadDECCycleValue - passedCycled);
}
break;
case SPR_UPIR:
v = hCPU->spr.UPIR;
break;
case SPR_PVR:
assert_dbg();
//v = hCPU->sprNew.PVR;
break;
case SPR_UGQR0:
case SPR_UGQR1:
case SPR_UGQR2:
case SPR_UGQR3:
case SPR_UGQR4:
case SPR_UGQR5:
case SPR_UGQR6:
case SPR_UGQR7:
v = hCPU->spr.UGQR[spr - SPR_UGQR0];
break;
default:
debug_printf("[C%d] Get unhandled SPR %d\n", hCPU->spr.UPIR, spr);
#ifndef PUBLIC_RELEASE
assert_dbg();
#endif
break;
}
//if( spr == SPR_LR || spr == SPR_PVR || spr == SPR_UPIR || spr == SPR_SCR || (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) )
//{
// // readable registers
// v = hCPU->spr[spr];
//}
//else if( spr == SPR_DEC )
//{
// // special handling for DEC register
// uint64 passedCycled = PPCInterpreter_getMainCoreCycleCounter() - ppcMainThreadDECCycleStart;
// if( passedCycled >= (uint64)ppcMainThreadDECCycleValue )
// v = 0;
// else
// v = ppcMainThreadDECCycleValue - passedCycled;
//}
//else if( spr == SPR_XER )
//{
// v = PPCInterpreter_getXER(hCPU);
//}
//else
//{
// debug_printf("[C%d] Get unhandled SPR %d value: %08x\n", hCPU->spr[SPR_UPIR], spr, hCPU->spr[spr]);
// v = hCPU->spr[spr];
//}
return v;
}

View file

@ -0,0 +1,75 @@
#pragma once
#include "PPCState.h"
struct PPCCoreCallbackData_t
{
sint32 gprCount = 0;
sint32 floatCount = 0;
};
// callback functions
inline uint32 PPCCoreCallback(MPTR function, const PPCCoreCallbackData_t& data)
{
return PPCCore_executeCallbackInternal(function)->gpr[3];
}
template <typename T, typename... TArgs>
uint32 PPCCoreCallback(MPTR function, PPCCoreCallbackData_t& data, T currentArg, TArgs... args)
{
cemu_assert_debug(data.gprCount <= 8);
cemu_assert_debug(data.floatCount <= 8);
if constexpr (std::is_pointer_v<T>)
{
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = MEMPTR(currentArg).GetMPTR();
data.gprCount++;
}
else if constexpr (std::is_base_of_v<MEMPTRBase, std::remove_reference_t<T>>)
{
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = currentArg.GetMPTR();
data.gprCount++;
}
else if constexpr (std::is_reference_v<T>)
{
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = MEMPTR(&currentArg).GetMPTR();
data.gprCount++;
}
else if constexpr(std::is_enum_v<T>)
{
using TEnum = typename std::underlying_type<T>::type;
return PPCCoreCallback<TEnum>(function, data, (TEnum)currentArg, std::forward(args)...);
}
else if constexpr (std::is_floating_point_v<T>)
{
ppcInterpreterCurrentInstance->fpr[1 + data.floatCount].fpr = (double)currentArg;
data.floatCount++;
}
else if constexpr (std::is_integral_v<T> && sizeof(T) == sizeof(uint64))
{
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = (uint32)(currentArg >> 32); // high
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount + 1] = (uint32)currentArg; // low
data.gprCount += 2;
}
else
{
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = (uint32)currentArg;
data.gprCount++;
}
return PPCCoreCallback(function, data, args...);
}
template <typename... TArgs>
uint32 PPCCoreCallback(MPTR function, TArgs... args)
{
PPCCoreCallbackData_t data{};
return PPCCoreCallback(function, data, std::forward<TArgs>(args)...);
}
template <typename... TArgs>
uint32 PPCCoreCallback(void* functionPtr, TArgs... args)
{
MEMPTR<void> _tmp{ functionPtr };
PPCCoreCallbackData_t data{};
return PPCCoreCallback(_tmp.GetMPTR(), data, std::forward<TArgs>(args)...);
}

View file

@ -0,0 +1,115 @@
#include "Cafe/OS/libs/gx2/GX2.h"
#include "Cafe/HW/Latte/Core/Latte.h"
#include "Cafe/OS/libs/coreinit/coreinit_Alarm.h"
#include "Cafe/OS/libs/coreinit/coreinit_Thread.h"
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h"
#include "Cafe/CafeSystem.h"
uint32 ppcThreadQuantum = 45000; // execute 45000 instructions before thread reschedule happens, this value can be overwritten by game profiles
void PPCInterpreter_relinquishTimeslice()
{
if( ppcInterpreterCurrentInstance->remainingCycles >= 0 )
{
ppcInterpreterCurrentInstance->skippedCycles = ppcInterpreterCurrentInstance->remainingCycles + 1;
ppcInterpreterCurrentInstance->remainingCycles = -1;
}
}
void PPCCore_boostQuantum(sint32 numCycles)
{
ppcInterpreterCurrentInstance->remainingCycles += numCycles;
}
void PPCCore_deboostQuantum(sint32 numCycles)
{
ppcInterpreterCurrentInstance->remainingCycles -= numCycles;
}
namespace coreinit
{
void __OSThreadSwitchToNext();
}
void PPCCore_switchToScheduler()
{
cemu_assert_debug(__OSHasSchedulerLock() == false); // scheduler lock must not be hold past thread time slice
cemu_assert_debug(ppcInterpreterCurrentInstance->coreInterruptMask != 0 || CafeSystem::GetForegroundTitleId() == 0x000500001019e600);
__OSLockScheduler();
coreinit::__OSThreadSwitchToNext();
__OSUnlockScheduler();
}
void PPCCore_switchToSchedulerWithLock()
{
cemu_assert_debug(__OSHasSchedulerLock() == true); // scheduler lock must be hold
cemu_assert_debug(ppcInterpreterCurrentInstance->coreInterruptMask != 0 || CafeSystem::GetForegroundTitleId() == 0x000500001019e600);
coreinit::__OSThreadSwitchToNext();
}
void _PPCCore_callbackExit(PPCInterpreter_t* hCPU)
{
PPCInterpreter_relinquishTimeslice();
hCPU->instructionPointer = 0;
}
PPCInterpreter_t* PPCCore_executeCallbackInternal(uint32 functionMPTR)
{
cemu_assert_debug(functionMPTR != 0);
PPCInterpreter_t* hCPU = ppcInterpreterCurrentInstance;
// remember LR and instruction pointer
uint32 lr = hCPU->spr.LR;
uint32 ip = hCPU->instructionPointer;
// save area
hCPU->gpr[1] -= 16 * 4;
// set LR
hCPU->spr.LR = PPCInterpreter_makeCallableExportDepr(_PPCCore_callbackExit);
// set instruction pointer
hCPU->instructionPointer = functionMPTR;
// execute code until we return from the function
while (true)
{
hCPU->remainingCycles = ppcThreadQuantum;
hCPU->skippedCycles = 0;
if (hCPU->remainingCycles > 0)
{
// try to enter recompiler immediately
PPCRecompiler_attemptEnter(hCPU, hCPU->instructionPointer);
// execute any remaining instructions in interpreter
while ((--hCPU->remainingCycles) >= 0)
{
PPCInterpreterSlim_executeInstruction(hCPU);
};
}
if (hCPU->instructionPointer == 0)
{
// restore remaining cycles
hCPU->remainingCycles += hCPU->skippedCycles;
hCPU->skippedCycles = 0;
break;
}
coreinit::OSYieldThread();
}
// save area
hCPU->gpr[1] += 16 * 4;
// restore LR and instruction pointer
hCPU->spr.LR = lr;
hCPU->instructionPointer = ip;
return hCPU;
}
__declspec(dllexport) void PPCCore_executeCallback(uint32 functionMPTR)
{
PPCCore_executeCallbackInternal(functionMPTR);
}
void PPCCore_deleteAllThreads()
{
assert_dbg();
}
void PPCCore_init()
{
}

View file

@ -0,0 +1,245 @@
struct PPCInterpreterLLEContext_t
{
uint8 padding[1024 * 128]; // reserved memory for stack (for recompiler mode)
PPCInterpreter_t cores[3];
};
PPCInterpreterGlobal_t globalCPUState = { 0 };
void PPCCoreLLE_initCore(PPCInterpreter_t* hCPU, uint32 coreIndex)
{
hCPU->spr.UPIR = coreIndex;
hCPU->global = &globalCPUState;
}
#define SCR_C2 (0x200000) // enable core 2
#define SCR_C1 (0x400000) // enable core 1
typedef struct
{
uint32be ukn000;
uint32be ukn004;
uint32be ukn008;
uint32be ukn00C;
uint32be ukn010;
uint32be ukn014;
uint32be busFreq;
uint32be ukn01C;
uint32be ukn020[4];
uint32be ukn030[4];
uint32be ukn040[4];
uint32be ukn050[4];
uint32be ukn060[4];
uint32be ukn070[4];
uint32be ukn080[4];
uint32be ukn090[4];
uint32be ukn0A0[4];
uint32be ukn0B0[4];
uint32be ukn0C0;
struct
{
uint32be id;
uint32be baseAddress;
uint32be size;
}ramInfo[3];
uint32 ukn0E8;
uint32 ukn0EC;
uint32 ukn0F0[4];
uint32 ukn100[8];
uint32 ukn120[8];
uint32 ukn140[8];
uint32 ukn160[8];
uint32 ukn180[8];
uint32 ukn1A0[8];
uint32 ukn1C0[8];
uint32 ukn1E0[8];
uint32 ukn200[8];
uint32 ukn220[8];
uint32 ukn240[8];
uint32 ukn260[8];
uint32 ukn280[8];
uint32 ukn2A0[8];
uint32 ukn2C0[8];
uint32 ukn2E0[8];
uint32 ukn300[8];
uint32 ukn320[8];
uint32 ukn340[8];
uint32 ukn360[8];
uint32 ukn380[8];
uint32be ukn3A0;
uint32be ukn3A4;
uint32be ukn3A8;
uint32be ukn3AC;
uint32be ukn3B0;
uint32be smdpAreaPtr;
uint32be ukn3B8;
uint32be ukn3BC;
uint32 ukn3C0[8];
uint32 ukn3E0[8];
uint32 ukn400;
uint32 ukn404;
uint32 ukn408;
}ppcBootParamBlock_t; // for kernel 5.5.2
static_assert(offsetof(ppcBootParamBlock_t, ramInfo) == 0xC4, "");
static_assert(offsetof(ppcBootParamBlock_t, busFreq) == 0x18, "");
static_assert(offsetof(ppcBootParamBlock_t, smdpAreaPtr) == 0x3B4, "");
static_assert(offsetof(ppcBootParamBlock_t, ukn400) == 0x400, "");
void PPCCoreLLE_setupBootParamBlock()
{
ppcBootParamBlock_t* bootParamBlock = (ppcBootParamBlock_t*)memory_getPointerFromPhysicalOffset(0x01FFF000);
memset(bootParamBlock, 0, sizeof(ppcBootParamBlock_t));
// setup RAM info
//PPCBaseAddress 0x8000000 0x00000000 0x28000000
//PPCSize 0x120000 0x2000000 0xA8000000
bootParamBlock->ukn004 = 0x40C;
bootParamBlock->busFreq = ESPRESSO_BUS_CLOCK;
bootParamBlock->ramInfo[0].id = 0;
bootParamBlock->ramInfo[0].baseAddress = 0x8000000;
bootParamBlock->ramInfo[0].size = 0x120000;
bootParamBlock->ramInfo[1].id = 1;
bootParamBlock->ramInfo[1].baseAddress = 0x00000000;
bootParamBlock->ramInfo[1].size = 0x2000000;
bootParamBlock->ramInfo[2].id = 2;
bootParamBlock->ramInfo[2].baseAddress = 0x28000000;
bootParamBlock->ramInfo[2].size = 0xA8000000;
}
typedef struct
{
uint32be magic;
uint32be count;
uint32 _padding08[14];
/* +0x0040 */ uint32be commandsReadIndex; // written by IOSU
uint32 _padding44[15];
/* +0x0080 */ uint32be commandsWriteIndex;
uint32 _padding84[15];
/* +0x00C0 */ uint32be resultsReadIndex;
uint32 _paddingC4[15];
/* +0x0100 */ uint32be resultsWriteIndex; // written by IOSU
uint32 _padding104[15];
/* +0x0140 */ uint32be commandPtrs[0xC00];
/* +0x3140 */ uint32be resultPtrs[0xC00];
}smdpArea_t;
static_assert(offsetof(smdpArea_t, commandsReadIndex) == 0x0040, "");
static_assert(offsetof(smdpArea_t, commandsWriteIndex) == 0x0080, "");
static_assert(offsetof(smdpArea_t, resultsReadIndex) == 0x00C0, "");
static_assert(offsetof(smdpArea_t, resultsWriteIndex) == 0x0100, "");
static_assert(offsetof(smdpArea_t, resultPtrs) == 0x3140, "");
typedef struct
{
uint32be type;
uint32be ukn04;
uint32be ukn08;
uint32be ukn0C;
uint32be ukn10;
uint32be ukn14;
uint32be ukn18;
uint32be ukn1C;
uint32be ukn20;
uint32be ukn24;
uint32be ukn28;
uint32be ukn2C;
}smdpCommand_t;
void smdpArea_pushResult(smdpArea_t* smdpArea, MPTR result)
{
//smdpArea.
smdpArea->resultPtrs[(uint32)smdpArea->resultsWriteIndex] = result;
smdpArea->resultsWriteIndex = ((uint32)smdpArea->resultsWriteIndex + 1)%(uint32)smdpArea->count;
}
void smdpArea_processCommand(smdpArea_t* smdpArea, smdpCommand_t* cmd)
{
if (cmd->type == 1)
{
cmd->ukn08 = 1;
// cmd->ukn2C ?
forceLogDebug_printf("SMDP command received - todo");
smdpArea_pushResult(smdpArea, memory_getVirtualOffsetFromPointer(cmd));
}
else
{
assert_dbg();
}
}
void smdpArea_thread()
{
while (true)
{
ppcBootParamBlock_t* bootParamBlock = (ppcBootParamBlock_t*)memory_getPointerFromPhysicalOffset(0x01FFF000);
if(bootParamBlock->smdpAreaPtr != MPTR_NULL)
{
smdpArea_t* smdpArea = (smdpArea_t*)memory_getPointerFromPhysicalOffset(bootParamBlock->smdpAreaPtr);
if (smdpArea->magic == 'smdp')
{
uint32 cmdReadIndex = smdpArea->commandsReadIndex;
uint32 cmdWriteIndex = smdpArea->commandsWriteIndex;
if (cmdReadIndex != cmdWriteIndex)
{
// new command
smdpArea_processCommand(smdpArea, (smdpCommand_t*)memory_getPointerFromPhysicalOffset(smdpArea->commandPtrs[cmdReadIndex]));
// increment read counter
cmdReadIndex = (cmdReadIndex + 1) % (uint32)smdpArea->count;
smdpArea->commandsReadIndex = cmdReadIndex;
}
}
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
}
void PPCCoreLLE_startSingleCoreScheduler(uint32 entrypoint)
{
PPCInterpreterLLEContext_t* cpuContext = (PPCInterpreterLLEContext_t*)malloc(sizeof(PPCInterpreterLLEContext_t));
memset(cpuContext, 0, sizeof(PPCInterpreterLLEContext_t));
PPCCoreLLE_setupBootParamBlock();
PPCCoreLLE_initCore(cpuContext->cores + 0, 0);
PPCCoreLLE_initCore(cpuContext->cores + 1, 1);
PPCCoreLLE_initCore(cpuContext->cores + 2, 2);
cpuContext->cores[0].instructionPointer = entrypoint;
cpuContext->cores[1].instructionPointer = 0xFFF00100;
cpuContext->cores[2].instructionPointer = 0xFFF00100;
// todo - calculate instruction pointer when core 1/2 is enabled (because entry point is determined by MSR exception vector bit)
std::thread(smdpArea_thread).detach();
while (true)
{
for (uint32 coreIndex = 0; coreIndex < 3; coreIndex++)
{
PPCInterpreter_t* hCPU = cpuContext->cores+coreIndex;
ppcInterpreterCurrentInstance = hCPU;
if (coreIndex == 1)
{
// check SCR core 1 enable bit
if ((globalCPUState.sprGlobal.scr&SCR_C1) == 0)
continue;
}
else if (coreIndex == 2)
{
// check SCR core 2 enable bit
if ((globalCPUState.sprGlobal.scr&SCR_C2) == 0)
continue;
}
hCPU->remainingCycles = 10000;
while ((--hCPU->remainingCycles) >= 0)
{
PPCInterpreterFull_executeInstruction(hCPU);
};
}
}
assert_dbg();
}

View file

@ -0,0 +1,255 @@
#pragma once
#include "Cafe/HW/MMU/MMU.h"
enum
{
CPUException_NOTHING,
CPUException_FPUUNAVAILABLE,
CPUException_EXTERNAL,
CPUException_SYSTEMCALL
};
#define PPC_LWARX_RESERVATION_MAX (4)
union FPR_t
{
double fpr;
struct
{
double fp0;
double fp1;
};
struct
{
uint64 guint;
};
struct
{
uint64 fp0int;
uint64 fp1int;
};
};
typedef struct
{
struct
{
uint32 scr;
uint32 car;
//uint32 bcr;
}sprGlobal;
uint64 tb;
}PPCInterpreterGlobal_t;
struct PPCInterpreter_t
{
uint32 instructionPointer;
uint32 gpr[32];
FPR_t fpr[32];
uint32 fpscr;
uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..)
uint8 xer_ca; // carry from xer
uint8 LSQE;
uint8 PSE;
// thread remaining cycles
sint32 remainingCycles; // if this value goes below zero, the next thread is scheduled
sint32 skippedCycles; // number of skipped cycles
struct
{
uint32 LR;
uint32 CTR;
uint32 XER;
uint32 UPIR;
uint32 UGQR[8];
}spr;
// LWARX and STWCX
uint32 reservedMemAddr;
uint32 reservedMemValue;
/* Note: Everything above is potentially hardcoded into Cemuhook. Do not touch anything or it will risk breaking compatibility */
// temporary storage for recompiler
FPR_t temporaryFPR[8];
uint32 temporaryGPR[4];
// values below this are not used by Cafe OS usermode
struct
{
uint32 fpecr; // is this the same register as fpscr ?
uint32 DEC;
uint32 srr0;
uint32 srr1;
uint32 PVR;
uint32 msr;
uint32 sprg[4];
// DSI/ISI
uint32 dar;
uint32 dsisr;
// DMA
uint32 dmaU;
uint32 dmaL;
// MMU
uint32 dbatU[8];
uint32 dbatL[8];
uint32 ibatU[8];
uint32 ibatL[8];
uint32 sr[16];
uint32 sdr1;
}sprExtended;
// global CPU values
PPCInterpreterGlobal_t* global;
// interpreter control
bool memoryException;
// core context (starts at 0xFFFFFF00?)
/* 0xFFFFFFE4 */ uint32 coreInterruptMask;
// extra variables for recompiler
void* rspTemp;
};
// parameter access (legacy C style)
static uint32 PPCInterpreter_getCallParamU32(PPCInterpreter_t* hCPU, uint32 index)
{
if (index >= 8)
return memory_readU32(hCPU->gpr[1] + 8 + (index - 8) * 4);
return hCPU->gpr[3 + index];
}
static uint64 PPCInterpreter_getCallParamU64(PPCInterpreter_t* hCPU, uint32 index)
{
uint64 v = ((uint64)PPCInterpreter_getCallParamU32(hCPU, index)) << 32ULL;
v |= ((uint64)PPCInterpreter_getCallParamU32(hCPU, index+1));
return v;
}
#define ppcGetCallParamU32(__index) PPCInterpreter_getCallParamU32(hCPU, __index)
#define ppcGetCallParamU16(__index) ((uint16)(PPCInterpreter_getCallParamU32(hCPU, __index)&0xFFFF))
#define ppcGetCallParamU8(__index) ((uint8)(PPCInterpreter_getCallParamU32(hCPU, __index)&0xFF))
#define ppcGetCallParamStruct(__index, __type) ((__type*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
// legacy way of accessing parameters
#define ppcDefineParamU32(__name, __index) uint32 __name = PPCInterpreter_getCallParamU32(hCPU, __index)
#define ppcDefineParamU16(__name, __index) uint16 __name = (uint16)PPCInterpreter_getCallParamU32(hCPU, __index)
#define ppcDefineParamU32BEPtr(__name, __index) uint32be* __name = (uint32be*)((uint8*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
#define ppcDefineParamS32(__name, __index) sint32 __name = (sint32)PPCInterpreter_getCallParamU32(hCPU, __index)
#define ppcDefineParamU64(__name, __index) uint64 __name = PPCInterpreter_getCallParamU64(hCPU, __index)
#define ppcDefineParamMPTR(__name, __index) MPTR __name = (MPTR)PPCInterpreter_getCallParamU32(hCPU, __index)
#define ppcDefineParamMEMPTR(__name, __type, __index) MEMPTR<__type> __name{PPCInterpreter_getCallParamU32(hCPU, __index)}
#define ppcDefineParamU8(__name, __index) uint8 __name = (PPCInterpreter_getCallParamU32(hCPU, __index)&0xFF)
#define ppcDefineParamStructPtr(__name, __type, __index) __type* __name = ((__type*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
#define ppcDefineParamTypePtr(__name, __type, __index) __type* __name = ((__type*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
#define ppcDefineParamPtr(__name, __type, __index) __type* __name = ((__type*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
#define ppcDefineParamStr(__name, __index) char* __name = ((char*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
#define ppcDefineParamUStr(__name, __index) uint8* __name = ((uint8*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
#define ppcDefineParamWStr(__name, __index) wchar_t* __name = ((wchar_t*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
#define ppcDefineParamWStrBE(__name, __index) uint16be* __name = ((uint16be*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
// GPR constants
#define GPR_SP 1
// interpreter functions
PPCInterpreter_t* PPCInterpreter_createInstance(unsigned int Entrypoint);
PPCInterpreter_t* PPCInterpreter_getCurrentInstance();
uint64 PPCInterpreter_getMainCoreCycleCounter();
void PPCInterpreter_nextInstruction(PPCInterpreter_t* cpuInterpreter);
void PPCInterpreter_jumpToInstruction(PPCInterpreter_t* cpuInterpreter, uint32 newIP);
void PPCInterpreterSlim_executeInstruction(PPCInterpreter_t* hCPU);
void PPCInterpreterFull_executeInstruction(PPCInterpreter_t* hCPU);
// misc
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU);
void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v);
// Wii U clocks (deprecated. Moved to Espresso/Const.h)
#define ESPRESSO_CORE_CLOCK 1243125000
#define ESPRESSO_BUS_CLOCK 248625000
#define ESPRESSO_TIMER_CLOCK (ESPRESSO_BUS_CLOCK/4) // 62156250
#define ESPRESSO_CORE_CLOCK_TO_TIMER_CLOCK(__cc) ((__cc)/20ULL)
// interrupt vectors
#define CPU_EXCEPTION_DSI 0x00000300
#define CPU_EXCEPTION_INTERRUPT 0x00000500 // todo: validate
#define CPU_EXCEPTION_FPUUNAVAIL 0x00000800 // todo: validate
#define CPU_EXCEPTION_SYSTEMCALL 0x00000C00 // todo: validate
#define CPU_EXCEPTION_DECREMENTER 0x00000900 // todo: validate
// FPU available check
//#define FPUCheckAvailable() if ((hCPU->msr & MSR_FP) == 0) { IPTException(hCPU, CPU_EXCEPTION_FPUUNAVAIL); return; }
#define FPUCheckAvailable() // since the emulated code always runs in usermode we can assume that MSR_FP is always set
// spr
void PPCSpr_set(PPCInterpreter_t* hCPU, uint32 spr, uint32 newValue);
uint32 PPCSpr_get(PPCInterpreter_t* hCPU, uint32 spr);
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU);
uint32 PPCInterpreter_getCurrentCoreIndex();
// decrement register
void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue);
// timing for main processor
extern volatile uint64 ppcMainThreadCycleCounter;
extern uint64 ppcCyclesSince2000; // on init this is set to the cycles that passed since 1.1.2000
extern uint64 ppcCyclesSince2000TimerClock; // on init this is set to the cycles that passed since 1.1.2000 / 20
extern uint64 ppcCyclesSince2000_UTC;
extern uint64 ppcMainThreadDECCycleValue; // value that was set to dec register
extern uint64 ppcMainThreadDECCycleStart; // at which cycle the dec register was set
// PPC timer
void PPCTimer_init();
void PPCTimer_waitForInit();
uint64 PPCTimer_getFromRDTSC();
bool PPCTimer_hasInvariantRDTSCSupport();
uint64 PPCTimer_microsecondsToTsc(uint64 us);
uint64 PPCTimer_tscToMicroseconds(uint64 us);
uint64 PPCTimer_getRawTsc();
void PPCTimer_start();
// core info and control
extern uint32 ppcThreadQuantum;
extern thread_local PPCInterpreter_t *ppcInterpreterCurrentInstance;
uint8* PPCInterpreterGetAndModifyStackPointer(sint32 offset);
uint8* PPCInterpreterGetStackPointer();
void PPCInterpreterModifyStackPointer(sint32 offset);
uint32 PPCInterpreter_makeCallableExportDepr(void (*ppcCallableExport)(PPCInterpreter_t* hCPU));
static inline float flushDenormalToZero(float f)
{
uint32 v = *(uint32*)&f;
return *(float*)&v;
}
// HLE interface
typedef void(*HLECALL)(PPCInterpreter_t* hCPU);
typedef sint32 HLEIDX;
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall);
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex);
// HLE scheduler
void PPCCore_deleteAllThreads();
void PPCInterpreter_relinquishTimeslice();
void PPCCore_boostQuantum(sint32 numCycles);
void PPCCore_deboostQuantum(sint32 numCycles);
void PPCCore_switchToScheduler();
void PPCCore_switchToSchedulerWithLock();
PPCInterpreter_t* PPCCore_executeCallbackInternal(uint32 functionMPTR);
void PPCCore_init();
// LLE scheduler
void PPCCoreLLE_startSingleCoreScheduler(uint32 entrypoint);

View file

@ -0,0 +1,194 @@
#include "Cafe/HW/Espresso/Const.h"
#include <immintrin.h>
#include "asm/x64util.h"
#include "config/ActiveSettings.h"
#include "util/helpers/fspinlock.h"
#include "util/highresolutiontimer/HighResolutionTimer.h"
#if BOOST_OS_LINUX > 0
static __inline__
unsigned __int64 _umul128(unsigned __int64,
unsigned __int64,
unsigned __int64*);
#endif
uint64 _rdtscLastMeasure = 0;
uint64 _rdtscFrequency = 0;
struct uint128_t
{
uint64 low;
uint64 high;
};
static_assert(sizeof(uint128_t) == 16);
uint128_t _rdtscAcc{};
#pragma intrinsic(__rdtsc)
uint64 muldiv64(uint64 a, uint64 b, uint64 d)
{
uint64 diva = a / d;
uint64 moda = a % d;
uint64 divb = b / d;
uint64 modb = b % d;
return diva * b + moda * divb + moda * modb / d;
}
bool PPCTimer_hasInvariantRDTSCSupport()
{
uint32 cpuv[4];
__cpuid((int*)cpuv, 0x80000007);
return ((cpuv[3] >> 8) & 1);
}
uint64 PPCTimer_estimateRDTSCFrequency()
{
if (PPCTimer_hasInvariantRDTSCSupport() == false)
forceLog_printf("Invariant TSC not supported");
_mm_mfence();
unsigned __int64 tscStart = __rdtsc();
unsigned int startTime = GetTickCount();
HRTick startTick = HighResolutionTimer::now().getTick();
// wait roughly 3 seconds
while (true)
{
if ((GetTickCount() - startTime) >= 3000)
break;
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
_mm_mfence();
HRTick stopTick = HighResolutionTimer::now().getTick();
unsigned __int64 tscEnd = __rdtsc();
// derive frequency approximation from measured time difference
uint64 tsc_diff = tscEnd - tscStart;
uint64 hrtFreq = 0;
uint64 hrtDiff = HighResolutionTimer::getTimeDiffEx(startTick, stopTick, hrtFreq);
uint64 tsc_freq = muldiv64(tsc_diff, hrtFreq, hrtDiff);
// uint64 freqMultiplier = tsc_freq / hrtFreq;
//forceLog_printf("RDTSC measurement test:");
//forceLog_printf("TSC-diff: 0x%016llx", tsc_diff);
//forceLog_printf("TSC-freq: 0x%016llx", tsc_freq);
//forceLog_printf("HPC-diff: 0x%016llx", qpc_diff);
//forceLog_printf("HPC-freq: 0x%016llx", (uint64)qpc_freq.QuadPart);
//forceLog_printf("Multiplier: 0x%016llx", freqMultiplier);
return tsc_freq;
}
int PPCTimer_initThread()
{
_rdtscFrequency = PPCTimer_estimateRDTSCFrequency();
return 0;
}
void PPCTimer_init()
{
std::thread t(PPCTimer_initThread);
t.detach();
_rdtscLastMeasure = __rdtsc();
}
uint64 _tickSummary = 0;
void PPCTimer_start()
{
_rdtscLastMeasure = __rdtsc();
_tickSummary = 0;
}
uint64 PPCTimer_getRawTsc()
{
return __rdtsc();
}
uint64 PPCTimer_microsecondsToTsc(uint64 us)
{
return (us * _rdtscFrequency) / 1000000ULL;
}
uint64 PPCTimer_tscToMicroseconds(uint64 us)
{
uint128_t r{};
#if BOOST_OS_WINDOWS > 0
r.low = _umul128(us, 1000000ULL, &r.high);
#else
r.low = _umul128(us, 1000000ULL, (unsigned long long*)&r.high);
#endif
uint64 remainder;
#if _MSC_VER < 1923
const uint64 microseconds = udiv128(r.low, r.high, _rdtscFrequency, &remainder);
#else
const uint64 microseconds = _udiv128(r.high, r.low, _rdtscFrequency, &remainder);
#endif
return microseconds;
}
bool PPCTimer_isReady()
{
return _rdtscFrequency != 0;
}
void PPCTimer_waitForInit()
{
while (!PPCTimer_isReady()) std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
FSpinlock sTimerSpinlock;
// thread safe
uint64 PPCTimer_getFromRDTSC()
{
sTimerSpinlock.acquire();
_mm_mfence();
uint64 rdtscCurrentMeasure = __rdtsc();
uint64 rdtscDif = rdtscCurrentMeasure - _rdtscLastMeasure;
// optimized max(rdtscDif, 0) without conditionals
rdtscDif = rdtscDif & ~(uint64)((sint64)rdtscDif >> 63);
uint128_t diff{};
#if BOOST_OS_WINDOWS > 0
diff.low = _umul128(rdtscDif, Espresso::CORE_CLOCK, &diff.high);
#else
diff.low = _umul128(rdtscDif, Espresso::CORE_CLOCK, (unsigned long long*)&diff.high);
#endif
if(rdtscCurrentMeasure > _rdtscLastMeasure)
_rdtscLastMeasure = rdtscCurrentMeasure; // only travel forward in time
uint8 c = 0;
#if BOOST_OS_WINDOWS > 0
c = _addcarry_u64(c, _rdtscAcc.low, diff.low, &_rdtscAcc.low);
_addcarry_u64(c, _rdtscAcc.high, diff.high, &_rdtscAcc.high);
#else
// requires casting because of long / long long nonesense
c = _addcarry_u64(c, _rdtscAcc.low, diff.low, (unsigned long long*)&_rdtscAcc.low);
_addcarry_u64(c, _rdtscAcc.high, diff.high, (unsigned long long*)&_rdtscAcc.high);
#endif
uint64 remainder;
#if _MSC_VER < 1923
uint64 elapsedTick = udiv128(_rdtscAcc.low, _rdtscAcc.high, _rdtscFrequency, &remainder);
#else
uint64 elapsedTick = _udiv128(_rdtscAcc.high, _rdtscAcc.low, _rdtscFrequency, &remainder);
#endif
_rdtscAcc.low = remainder;
_rdtscAcc.high = 0;
// timer scaling
elapsedTick <<= 3ull; // *8
uint8 timerShiftFactor = ActiveSettings::GetTimerShiftFactor();
elapsedTick >>= timerShiftFactor;
_tickSummary += elapsedTick;
sTimerSpinlock.release();
return _tickSummary;
}

View file

@ -0,0 +1,293 @@
#pragma once
#include "Cafe/HW/Espresso/EspressoISA.h"
#include "Cafe/HW/MMU/MMU.h"
bool GamePatch_IsNonReturnFunction(uint32 hleIndex);
// utility class to determine shape of a function
class PPCFunctionBoundaryTracker
{
public:
struct PPCRange_t
{
PPCRange_t() {};
PPCRange_t(uint32 _startAddress) : startAddress(_startAddress) {};
uint32 startAddress{};
uint32 length{};
//bool isProcessed{false};
uint32 getEndAddress() const { return startAddress + length; };
};
public:
void trackStartPoint(MPTR startAddress)
{
processRange(startAddress, nullptr, nullptr);
processBranchTargets();
}
bool getRangeForAddress(uint32 address, PPCRange_t& range)
{
for (auto itr : map_ranges)
{
if (address >= itr->startAddress && address < (itr->startAddress + itr->length))
{
range = *itr;
return true;
}
}
return false;
}
private:
void addBranchDestination(PPCRange_t* sourceRange, MPTR address)
{
map_branchTargets.emplace(address);
}
// process flow of instruction
// returns false if the IP cannot increment past the current instruction
bool processInstruction(PPCRange_t* range, MPTR address)
{
// parse instructions
uint32 opcode = memory_readU32(address);
switch (Espresso::GetPrimaryOpcode(opcode))
{
case Espresso::PrimaryOpcode::ZERO:
{
if (opcode == 0)
return false; // invalid instruction
break;
}
case Espresso::PrimaryOpcode::VIRTUAL_HLE:
{
// end of function
// is there a jump to a instruction after this one?
uint32 hleFuncId = opcode & 0xFFFF;
if (hleFuncId >= 0x1000 && hleFuncId < 0x4000)
{
if (GamePatch_IsNonReturnFunction(hleFuncId - 0x1000) == false)
{
return true;
}
}
return false;
}
case Espresso::PrimaryOpcode::BC:
{
uint32 BD, BI;
Espresso::BOField BO;
bool AA, LK;
Espresso::decodeOp_BC(opcode, BD, BO, BI, AA, LK);
uint32 branchTarget = AA ? BD : BD + address;
if (!LK)
addBranchDestination(range, branchTarget);
break;
}
case Espresso::PrimaryOpcode::B:
{
uint32 LI;
bool AA, LK;
Espresso::decodeOp_B(opcode, LI, AA, LK);
uint32 branchTarget = AA ? LI : LI + address;
if (!LK)
{
addBranchDestination(range, branchTarget);
// if the next two or previous two instructions are branch instructions, we assume that they are destinations of a jump table
// todo - can we make this more reliable by checking for BCTR or similar instructions first?
// example: The Swapper 0x01B1FC04
if (PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address + 4)) && PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address + 8)) ||
PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address - 8)) && PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address - 4)))
{
return true;
}
return false; // current flow ends at unconditional branch instruction
}
break;
}
case Espresso::PrimaryOpcode::GROUP_19:
switch (Espresso::GetGroup19Opcode(opcode))
{
case Espresso::Opcode19::BCLR:
{
Espresso::BOField BO;
uint32 BI;
bool LK;
Espresso::decodeOp_BCLR(opcode, BO, BI, LK);
if (BO.branchAlways() && !LK)
{
// unconditional BLR
return false;
}
break;
}
case Espresso::Opcode19::BCCTR:
if (opcode == 0x4E800420)
{
// unconditional BCTR
// this instruction is often used for switch statements, therefore we should be wary of ending the function here
// It's better to overestimate function size than to predict sizes that are too short
// Currently we only end the function if the BCTR is followed by a NOP (alignment) or invalid instruction
// todo: improve robustness, find better ways to handle false positives
uint32 nextOpcode = memory_readU32(address + 4);
if (nextOpcode == 0x60000000 || PPCRecompilerCalcFuncSize_isValidInstruction(nextOpcode) == false)
{
return false;
}
return true;
}
// conditional BCTR
return true;
default:
break;
}
break;
default:
break;
}
return true;
}
void checkForCollisions()
{
#ifndef PUBLIC_RELEASE
uint32 endOfPrevious = 0;
for (auto itr : map_ranges)
{
if (endOfPrevious > itr->startAddress)
{
cemu_assert_debug(false);
}
endOfPrevious = itr->startAddress + itr->length;
}
#endif
}
// nextRange must point to the closest range after startAddress, or NULL if there is none
void processRange(MPTR startAddress, PPCRange_t* previousRange, PPCRange_t* nextRange)
{
checkForCollisions();
cemu_assert_debug(previousRange == nullptr || (startAddress == (previousRange->startAddress + previousRange->length)));
PPCRange_t* newRange;
if (previousRange && (previousRange->startAddress + previousRange->length) == startAddress)
{
newRange = previousRange;
}
else
{
cemu_assert_debug(previousRange == nullptr);
newRange = new PPCRange_t(startAddress);
map_ranges.emplace(newRange);
}
// process instruction flow until it is interrupted by a non-conditional branch
MPTR currentAddress = startAddress;
MPTR endAddress = 0xFFFFFFFF;
if (nextRange)
endAddress = nextRange->startAddress;
while (currentAddress < endAddress)
{
if (!processInstruction(newRange, currentAddress))
{
currentAddress += 4;
break;
}
currentAddress += 4;
}
newRange->length = currentAddress - newRange->startAddress;
if (nextRange && currentAddress >= nextRange->startAddress)
{
// merge with next range
newRange->length = (nextRange->startAddress + nextRange->length) - newRange->startAddress;
map_ranges.erase(nextRange);
delete nextRange;
checkForCollisions();
return;
}
checkForCollisions();
}
// find first unvisited branch target and start a new range there
// return true if method should be called again
bool processBranchTargetsSinglePass()
{
cemu_assert_debug(!map_ranges.empty());
auto rangeItr = map_ranges.begin();
PPCRange_t* previousRange = nullptr;
for (std::set<uint32_t>::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); )
{
while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr))
{
previousRange = *rangeItr;
rangeItr++;
if (rangeItr == map_ranges.end())
{
// last range reached
if ((previousRange->startAddress + previousRange->length) == *targetItr)
processRange(*targetItr, previousRange, nullptr);
else
processRange(*targetItr, nullptr, nullptr);
return true;
}
}
if ((*targetItr) >= (*rangeItr)->startAddress &&
(*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length))
{
// delete visited targets
targetItr = map_branchTargets.erase(targetItr);
continue;
}
cemu_assert_debug((*rangeItr)->startAddress > (*targetItr));
if (previousRange && (previousRange->startAddress + previousRange->length) == *targetItr)
processRange(*targetItr, previousRange, *rangeItr); // extend previousRange
else
processRange(*targetItr, nullptr, *rangeItr);
return true;
}
return false;
}
void processBranchTargets()
{
while (processBranchTargetsSinglePass());
}
private:
bool PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(uint32 opcode)
{
if (Espresso::GetPrimaryOpcode(opcode) == Espresso::PrimaryOpcode::B)
{
uint32 LI;
bool AA, LK;
Espresso::decodeOp_B(opcode, LI, AA, LK);
if (!LK)
return true;
}
return false;
}
bool PPCRecompilerCalcFuncSize_isValidInstruction(uint32 opcode)
{
if ((opcode >> 26) == 0)
return false;
return true;
}
private:
struct RangePtrCmp
{
bool operator()(const PPCRange_t* lhs, const PPCRange_t* rhs) const
{
return lhs->startAddress < rhs->startAddress;
}
};
std::set<PPCRange_t*, RangePtrCmp> map_ranges;
std::set<uint32> map_branchTargets;
};

View file

@ -0,0 +1,593 @@
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
#include "PPCFunctionBoundaryTracker.h"
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
#include "PPCRecompilerX64.h"
#include "Cafe/OS/RPL/rpl.h"
#include "util/containers/RangeStore.h"
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
#include "config/ActiveSettings.h"
#include "config/LaunchSettings.h"
#include "util/helpers/fspinlock.h"
#include "Common/ExceptionHandler/ExceptionHandler.h"
#include "util/helpers/helpers.h"
#include "util/MemMapper/MemMapper.h"
struct PPCInvalidationRange
{
MPTR startAddress;
uint32 size;
PPCInvalidationRange(MPTR _startAddress, uint32 _size) : startAddress(_startAddress), size(_size) {};
};
struct
{
FSpinlock recompilerSpinlock;
std::queue<MPTR> targetQueue;
std::vector<PPCInvalidationRange> invalidationRanges;
}PPCRecompilerState;
RangeStore<PPCRecFunction_t*, uint32, 7703, 0x2000> rangeStore_ppcRanges;
void ATTR_MS_ABI (*PPCRecompiler_enterRecompilerCode)(uint64 codeMem, uint64 ppcInterpreterInstance);
void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_visited)();
void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
bool ppcRecompilerEnabled = false;
// this function does never block and can fail if the recompiler lock cannot be acquired immediately
void PPCRecompiler_visitAddressNoBlock(uint32 enterAddress)
{
// quick read-only check without lock
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
return;
// try to acquire lock
if (!PPCRecompilerState.recompilerSpinlock.tryAcquire())
return;
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
if (funcPtr != PPCRecompiler_leaveRecompilerCode_unvisited)
{
// was visited since previous check
PPCRecompilerState.recompilerSpinlock.release();
return;
}
// add to recompilation queue and flag as visited
PPCRecompilerState.targetQueue.emplace(enterAddress);
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited;
PPCRecompilerState.recompilerSpinlock.release();
}
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress)
{
if (ppcRecompilerEnabled == false)
return;
PPCRecompiler_visitAddressNoBlock(enterAddress);
}
void PPCRecompiler_enter(PPCInterpreter_t* hCPU, PPCREC_JUMP_ENTRY funcPtr)
{
#if BOOST_OS_WINDOWS > 0
uint32 prevState = _controlfp(0, 0);
_controlfp(_RC_NEAR, _MCW_RC);
PPCRecompiler_enterRecompilerCode((uint64)funcPtr, (uint64)hCPU);
_controlfp(prevState, _MCW_RC);
// debug recompiler exit - useful to find frequently executed functions which couldn't be recompiled
#ifndef PUBLIC_RELEASE
if (hCPU->remainingCycles > 0 && GetAsyncKeyState(VK_F4))
{
auto t = std::chrono::high_resolution_clock::now();
auto dur = std::chrono::duration_cast<std::chrono::microseconds>(t.time_since_epoch()).count();
forceLog_printf("Recompiler exit: 0x%08x LR: 0x%08x Timestamp %lld.%04lld", hCPU->instructionPointer, hCPU->spr.LR, dur / 1000LL, (dur % 1000LL));
}
#endif
#else
PPCRecompiler_enterRecompilerCode((uint64)funcPtr, (uint64)hCPU);
#endif
// after leaving recompiler prematurely attempt to recompile the code at the new location
if (hCPU->remainingCycles > 0)
{
PPCRecompiler_visitAddressNoBlock(hCPU->instructionPointer);
}
}
void PPCRecompiler_attemptEnterWithoutRecompile(PPCInterpreter_t* hCPU, uint32 enterAddress)
{
cemu_assert_debug(hCPU->instructionPointer == enterAddress);
if (ppcRecompilerEnabled == false)
return;
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
if (funcPtr != PPCRecompiler_leaveRecompilerCode_unvisited && funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
{
cemu_assert_debug(ppcRecompilerInstanceData != nullptr);
PPCRecompiler_enter(hCPU, funcPtr);
}
}
void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress)
{
cemu_assert_debug(hCPU->instructionPointer == enterAddress);
if (ppcRecompilerEnabled == false)
return;
if (hCPU->remainingCycles <= 0)
return;
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
if (funcPtr == PPCRecompiler_leaveRecompilerCode_unvisited)
{
PPCRecompiler_visitAddressNoBlock(enterAddress);
}
else if (funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
{
// enter
cemu_assert_debug(ppcRecompilerInstanceData != nullptr);
PPCRecompiler_enter(hCPU, funcPtr);
}
}
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut)
{
if (range.startAddress >= PPC_REC_CODE_AREA_END)
{
cemuLog_force("Attempting to recompile function outside of allowed code area");
return nullptr;
}
uint32 codeGenRangeStart;
uint32 codeGenRangeSize = 0;
coreinit::OSGetCodegenVirtAddrRangeInternal(codeGenRangeStart, codeGenRangeSize);
if (codeGenRangeSize != 0)
{
if (range.startAddress >= codeGenRangeStart && range.startAddress < (codeGenRangeStart + codeGenRangeSize))
{
if (coreinit::codeGenShouldAvoid())
{
return nullptr;
}
}
}
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
ppcRecFunc->ppcAddress = range.startAddress;
ppcRecFunc->ppcSize = range.length;
// generate intermediate code
ppcImlGenContext_t ppcImlGenContext = { 0 };
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses);
if (compiledSuccessfully == false)
{
// todo: Free everything
PPCRecompiler_freeContext(&ppcImlGenContext);
delete ppcRecFunc;
return NULL;
}
// emit x64 code
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
if (x64GenerationSuccess == false)
{
PPCRecompiler_freeContext(&ppcImlGenContext);
return nullptr;
}
// collect list of PPC-->x64 entry points
entryPointsOut.clear();
for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++)
{
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
if (imlSegment->isEnterable == false)
continue;
uint32 ppcEnterOffset = imlSegment->enterPPCAddress;
uint32 x64Offset = imlSegment->x64Offset;
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
}
PPCRecompiler_freeContext(&ppcImlGenContext);
return ppcRecFunc;
}
bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector<std::pair<MPTR, uint32>>& entryPoints)
{
// update jump table
PPCRecompilerState.recompilerSpinlock.acquire();
// check if the initial entrypoint is still flagged for recompilation
// its possible that the range has been invalidated during the time it took to translate the function
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[initialEntryPoint / 4] != PPCRecompiler_leaveRecompilerCode_visited)
{
PPCRecompilerState.recompilerSpinlock.release();
return false;
}
// check if the current range got invalidated in the time it took to recompile it
bool isInvalidated = false;
for (auto& invRange : PPCRecompilerState.invalidationRanges)
{
MPTR rStartAddr = invRange.startAddress;
MPTR rEndAddr = rStartAddr + invRange.size;
for (auto& recFuncRange : ppcRecFunc->list_ranges)
{
if (recFuncRange.ppcAddress < (rEndAddr) && (recFuncRange.ppcAddress + recFuncRange.ppcSize) >= rStartAddr)
{
isInvalidated = true;
break;
}
}
}
PPCRecompilerState.invalidationRanges.clear();
if (isInvalidated)
{
PPCRecompilerState.recompilerSpinlock.release();
return false;
}
// update jump table
for (auto& itr : entryPoints)
{
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[itr.first / 4] = (PPCREC_JUMP_ENTRY)((uint8*)ppcRecFunc->x86Code + itr.second);
}
// due to inlining, some entrypoints can get optimized away
// therefore we reset all addresses that are still marked as visited (but not recompiled)
// we dont remove the points from the queue but any address thats not marked as visited won't get recompiled
// if they are reachable, the interpreter will queue them again
for (uint32 v = range.startAddress; v <= (range.startAddress + range.length); v += 4)
{
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[v / 4];
if (funcPtr == PPCRecompiler_leaveRecompilerCode_visited)
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[v / 4] = PPCRecompiler_leaveRecompilerCode_unvisited;
}
// register ranges
for (auto& r : ppcRecFunc->list_ranges)
{
r.storedRange = rangeStore_ppcRanges.storeRange(ppcRecFunc, r.ppcAddress, r.ppcAddress + r.ppcSize);
}
PPCRecompilerState.recompilerSpinlock.release();
return true;
}
void PPCRecompiler_recompileAtAddress(uint32 address)
{
cemu_assert_debug(ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[address / 4] == PPCRecompiler_leaveRecompilerCode_visited);
// get size
PPCFunctionBoundaryTracker funcBoundaries;
funcBoundaries.trackStartPoint(address);
// get range that encompasses address
PPCFunctionBoundaryTracker::PPCRange_t range;
if (funcBoundaries.getRangeForAddress(address, range) == false)
{
cemu_assert_debug(false);
}
// todo - use info from previously compiled ranges to determine full size of this function (and merge all the entryAddresses)
// collect all currently known entry points for this range
PPCRecompilerState.recompilerSpinlock.acquire();
std::set<uint32> entryAddresses;
entryAddresses.emplace(address);
PPCRecompilerState.recompilerSpinlock.release();
std::vector<std::pair<MPTR, uint32>> functionEntryPoints;
auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints);
if (!func)
{
return; // recompilation failed
}
bool r = PPCRecompiler_makeRecompiledFunctionActive(address, range, func, functionEntryPoints);
}
void PPCRecompiler_thread()
{
SetThreadName("PPCRecompiler_thread");
while (true)
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
// asynchronous recompilation:
// 1) take address from queue
// 2) check if address is still marked as visited
// 3) if yes -> calculate size, gather all entry points, recompile and update jump table
while (true)
{
PPCRecompilerState.recompilerSpinlock.acquire();
if (PPCRecompilerState.targetQueue.empty())
{
PPCRecompilerState.recompilerSpinlock.release();
break;
}
auto enterAddress = PPCRecompilerState.targetQueue.front();
PPCRecompilerState.targetQueue.pop();
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
if (funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
{
// only recompile functions if marked as visited
PPCRecompilerState.recompilerSpinlock.release();
continue;
}
PPCRecompilerState.recompilerSpinlock.release();
PPCRecompiler_recompileAtAddress(enterAddress);
}
}
}
#define PPC_REC_ALLOC_BLOCK_SIZE (4*1024*1024) // 4MB
std::bitset<(MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE) / PPC_REC_ALLOC_BLOCK_SIZE> ppcRecompiler_reservedBlockMask;
void PPCRecompiler_reserveLookupTableBlock(uint32 offset)
{
uint32 blockIndex = offset / PPC_REC_ALLOC_BLOCK_SIZE;
offset = blockIndex * PPC_REC_ALLOC_BLOCK_SIZE;
if (ppcRecompiler_reservedBlockMask[blockIndex])
return;
ppcRecompiler_reservedBlockMask[blockIndex] = true;
void* p1 = MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->ppcRecompilerFuncTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), MemMapper::PAGE_PERMISSION::P_RW, true);
void* p3 = MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), MemMapper::PAGE_PERMISSION::P_RW, true);
if( !p1 || !p3 )
{
forceLog_printf("Failed to allocate memory for recompiler (0x%08x)", offset);
cemu_assert(false);
return;
}
for(uint32 i=0; i<PPC_REC_ALLOC_BLOCK_SIZE/4; i++)
{
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset/4+i] = PPCRecompiler_leaveRecompilerCode_unvisited;
}
}
void PPCRecompiler_allocateRange(uint32 startAddress, uint32 size)
{
if (ppcRecompilerInstanceData == nullptr)
return;
uint32 endAddress = (startAddress + size + PPC_REC_ALLOC_BLOCK_SIZE - 1) & ~(PPC_REC_ALLOC_BLOCK_SIZE-1);
startAddress = (startAddress) & ~(PPC_REC_ALLOC_BLOCK_SIZE-1);
startAddress = std::min(startAddress, (uint32)MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE);
endAddress = std::min(endAddress, (uint32)MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE);
for (uint32 i = startAddress; i < endAddress; i += PPC_REC_ALLOC_BLOCK_SIZE)
{
PPCRecompiler_reserveLookupTableBlock(i);
}
}
struct ppcRecompilerFuncRange_t
{
MPTR ppcStart;
uint32 ppcSize;
void* x86Start;
size_t x86Size;
};
DLLEXPORT bool PPCRecompiler_findFuncRanges(uint32 addr, ppcRecompilerFuncRange_t* rangesOut, size_t* countInOut)
{
PPCRecompilerState.recompilerSpinlock.acquire();
size_t countIn = *countInOut;
size_t countOut = 0;
rangeStore_ppcRanges.findRanges(addr, addr + 4, [rangesOut, countIn, &countOut](uint32 start, uint32 end, PPCRecFunction_t* func)
{
if (countOut < countIn)
{
rangesOut[countOut].ppcStart = start;
rangesOut[countOut].ppcSize = (end-start);
rangesOut[countOut].x86Start = func->x86Code;
rangesOut[countOut].x86Size = func->x86Size;
}
countOut++;
}
);
PPCRecompilerState.recompilerSpinlock.release();
*countInOut = countOut;
if (countOut > countIn)
return false;
return true;
}
DLLEXPORT uintptr_t* PPCRecompiler_getJumpTableBase()
{
if (ppcRecompilerInstanceData == nullptr)
return nullptr;
return (uintptr_t*)ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable;
}
void PPCRecompiler_invalidateTableRange(uint32 offset, uint32 size)
{
if (ppcRecompilerInstanceData == nullptr)
return;
for (uint32 i = 0; i < size / 4; i++)
{
ppcRecompilerInstanceData->ppcRecompilerFuncTable[offset / 4 + i] = nullptr;
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset / 4 + i] = PPCRecompiler_leaveRecompilerCode_unvisited;
}
}
void PPCRecompiler_deleteFunction(PPCRecFunction_t* func)
{
// assumes PPCRecompilerState.recompilerSpinlock is already held
cemu_assert_debug(PPCRecompilerState.recompilerSpinlock.isHolding());
for (auto& r : func->list_ranges)
{
PPCRecompiler_invalidateTableRange(r.ppcAddress, r.ppcSize);
if(r.storedRange)
rangeStore_ppcRanges.deleteRange(r.storedRange);
r.storedRange = nullptr;
}
// todo - free x86 code
}
DLLEXPORT void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr)
{
if (ppcRecompilerEnabled == false)
return;
if (startAddr >= PPC_REC_CODE_AREA_SIZE)
return;
cemu_assert_debug(endAddr >= startAddr);
PPCRecompilerState.recompilerSpinlock.acquire();
uint32 rStart;
uint32 rEnd;
PPCRecFunction_t* rFunc;
// mark range as unvisited
for (uint64 currentAddr = (uint64)startAddr&~3; currentAddr < (uint64)(endAddr&~3); currentAddr += 4)
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[currentAddr / 4] = PPCRecompiler_leaveRecompilerCode_unvisited;
// add entry to invalidation queue
PPCRecompilerState.invalidationRanges.emplace_back(startAddr, endAddr-startAddr);
while (rangeStore_ppcRanges.findFirstRange(startAddr, endAddr, rStart, rEnd, rFunc) )
{
PPCRecompiler_deleteFunction(rFunc);
}
PPCRecompilerState.recompilerSpinlock.release();
}
void PPCRecompiler_init()
{
if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter)
{
ppcRecompilerEnabled = false;
return;
}
if (LaunchSettings::ForceInterpreter())
{
cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter was passed");
return;
}
if (ppcRecompilerInstanceData)
{
MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t));
ppcRecompilerInstanceData = nullptr;
}
debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
uint32 codeRegionEnd = RPLLoader_GetMaxCodeOffset();
codeRegionEnd = (codeRegionEnd + PPC_REC_ALLOC_BLOCK_SIZE - 1) & ~(PPC_REC_ALLOC_BLOCK_SIZE - 1);
uint32 codeRegionSize = codeRegionEnd - PPC_REC_CODE_AREA_START;
forceLogDebug_printf("Allocating recompiler tables for range 0x%08x-0x%08x", PPC_REC_CODE_AREA_START, codeRegionEnd);
for (uint32 i = 0; i < codeRegionSize; i += PPC_REC_ALLOC_BLOCK_SIZE)
{
PPCRecompiler_reserveLookupTableBlock(i);
}
// init x64 recompiler instance data
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL;
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL;
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL;
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL;
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL;
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL;
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL);
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL;
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL);
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL);
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31);
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF;
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF;
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF;
ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL;
ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL;
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0;
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0;
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0;
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0;
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f;
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f;
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f;
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f;
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000;
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000;
// setup GQR scale tables
for (uint32 i = 0; i < 32; i++)
{
float a = 1.0f / (float)(1u << i);
float b = 0;
if (i == 0)
b = 4294967296.0f;
else
b = (float)(1u << (32u - i));
float ar = (float)(1u << i);
float br = 0;
if (i == 0)
br = 1.0f / 4294967296.0f;
else
br = 1.0f / (float)(1u << (32u - i));
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 0] = a;
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 1] = 1.0f;
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 0] = b;
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 0] = a;
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 1] = a;
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 0] = b;
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 1] = b;
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 0] = ar;
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 1] = 1.0f;
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 0] = br;
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 0] = ar;
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 1] = ar;
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 0] = br;
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br;
}
// mxcsr
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
// query processor extensions
int cpuInfo[4];
__cpuid(cpuInfo, 0x80000001);
hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0;
__cpuid(cpuInfo, 0x1);
hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0;
hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0;
__cpuidex(cpuInfo, 0x7, 0);
hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0;
forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", hasLZCNTSupport ? "LZCNT " : "", hasMOVBESupport ? "MOVBE " : "", hasAVXSupport ? "AVX " : "");
ppcRecompilerEnabled = true;
// launch recompilation thread
std::thread t_recompiler(PPCRecompiler_thread);
t_recompiler.detach();
}

View file

@ -0,0 +1,399 @@
#include <vector>
#define PPC_REC_CODE_AREA_START (0x00000000) // lower bound of executable memory area. Recompiler expects this address to be 0
#define PPC_REC_CODE_AREA_END (0x10000000) // upper bound of executable memory area
#define PPC_REC_CODE_AREA_SIZE (PPC_REC_CODE_AREA_END - PPC_REC_CODE_AREA_START)
#define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1))
#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
typedef struct
{
uint32 ppcAddress;
uint32 ppcSize;
//void* x86Start;
//size_t x86Size;
void* storedRange;
}ppcRecRange_t;
typedef struct
{
uint32 ppcAddress;
uint32 ppcSize; // ppc code size of function
void* x86Code; // pointer to x86 code
size_t x86Size;
std::vector<ppcRecRange_t> list_ranges;
}PPCRecFunction_t;
#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0)
#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1)
#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision
#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used
typedef struct
{
uint8 type;
uint8 operation;
uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr.
uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior
uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated
uint32 associatedPPCAddress; // ppc address that is associated with this instruction
union
{
struct
{
uint8 _padding[7];
}padding;
struct
{
// R (op) A [update cr* in mode *]
uint8 registerResult;
uint8 registerA;
}op_r_r;
struct
{
// R = A (op) B [update cr* in mode *]
uint8 registerResult;
uint8 registerA;
uint8 registerB;
}op_r_r_r;
struct
{
// R = A (op) immS32 [update cr* in mode *]
uint8 registerResult;
uint8 registerA;
sint32 immS32;
}op_r_r_s32;
struct
{
// R/F = NAME or NAME = R/F
uint8 registerIndex;
uint8 copyWidth;
uint32 name;
uint8 flags;
}op_r_name;
struct
{
// R (op) s32 [update cr* in mode *]
uint8 registerIndex;
sint32 immS32;
}op_r_immS32;
struct
{
uint32 address;
uint8 flags;
}op_jumpmark;
struct
{
uint32 param;
uint32 param2;
uint16 paramU16;
}op_macro;
struct
{
uint32 jumpmarkAddress;
bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress
uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup)
uint8 crRegisterIndex;
uint8 crBitIndex;
bool bitMustBeSet;
}op_conditionalJump;
struct
{
uint8 registerData;
uint8 registerMem;
uint8 registerMem2;
uint8 registerGQR;
uint8 copyWidth;
//uint8 flags;
struct
{
bool swapEndian : 1;
bool signExtend : 1;
bool notExpanded : 1; // for floats
}flags2;
uint8 mode; // transfer mode (copy width, ps0/ps1 behavior)
sint32 immS32;
}op_storeLoad;
struct
{
struct
{
uint8 registerMem;
sint32 immS32;
}src;
struct
{
uint8 registerMem;
sint32 immS32;
}dst;
uint8 copyWidth;
}op_mem2mem;
struct
{
uint8 registerResult;
uint8 registerOperand;
uint8 flags;
}op_fpr_r_r;
struct
{
uint8 registerResult;
uint8 registerOperandA;
uint8 registerOperandB;
uint8 flags;
}op_fpr_r_r_r;
struct
{
uint8 registerResult;
uint8 registerOperandA;
uint8 registerOperandB;
uint8 registerOperandC;
uint8 flags;
}op_fpr_r_r_r_r;
struct
{
uint8 registerResult;
//uint8 flags;
}op_fpr_r;
struct
{
uint32 ppcAddress;
uint32 x64Offset;
}op_ppcEnter;
struct
{
uint8 crD; // crBitIndex (result)
uint8 crA; // crBitIndex
uint8 crB; // crBitIndex
}op_cr;
// conditional operations (emitted if supported by target platform)
struct
{
// r_s32
uint8 registerIndex;
sint32 immS32;
// condition
uint8 crRegisterIndex;
uint8 crBitIndex;
bool bitMustBeSet;
}op_conditional_r_s32;
};
}PPCRecImlInstruction_t;
typedef struct _PPCRecImlSegment_t PPCRecImlSegment_t;
typedef struct _ppcRecompilerSegmentPoint_t
{
sint32 index;
PPCRecImlSegment_t* imlSegment;
_ppcRecompilerSegmentPoint_t* next;
_ppcRecompilerSegmentPoint_t* prev;
}ppcRecompilerSegmentPoint_t;
struct raLivenessLocation_t
{
sint32 index;
bool isRead;
bool isWrite;
raLivenessLocation_t() {};
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
: index(index), isRead(isRead), isWrite(isWrite) {};
};
struct raLivenessSubrangeLink_t
{
struct raLivenessSubrange_t* prev;
struct raLivenessSubrange_t* next;
};
struct raLivenessSubrange_t
{
struct raLivenessRange_t* range;
PPCRecImlSegment_t* imlSegment;
ppcRecompilerSegmentPoint_t start;
ppcRecompilerSegmentPoint_t end;
// dirty state tracking
bool _noLoad;
bool hasStore;
bool hasStoreDelayed;
// next
raLivenessSubrange_t* subrangeBranchTaken;
raLivenessSubrange_t* subrangeBranchNotTaken;
// processing
uint32 lastIterationIndex;
// instruction locations
std::vector<raLivenessLocation_t> list_locations;
// linked list (subranges with same GPR virtual register)
raLivenessSubrangeLink_t link_sameVirtualRegisterGPR;
// linked list (all subranges for this segment)
raLivenessSubrangeLink_t link_segmentSubrangesGPR;
};
struct raLivenessRange_t
{
sint32 virtualRegister;
sint32 physicalRegister;
sint32 name;
std::vector<raLivenessSubrange_t*> list_subranges;
};
struct PPCSegmentRegisterAllocatorInfo_t
{
// analyzer stage
bool isPartOfProcessedLoop{}; // used during loop detection
sint32 lastIterationIndex{};
// linked lists
raLivenessSubrange_t* linkedList_allSubranges{};
raLivenessSubrange_t* linkedList_perVirtualGPR[PPC_REC_MAX_VIRTUAL_GPR]{};
};
struct PPCRecVGPRDistances_t
{
struct _RegArrayEntry
{
sint32 usageStart{};
sint32 usageEnd{};
}reg[PPC_REC_MAX_VIRTUAL_GPR];
bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{};
};
typedef struct _PPCRecImlSegment_t
{
sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection)
sint32 startOffset{}; // offset to first instruction in iml instruction list
sint32 count{}; // number of instructions in segment
uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address)
uint32 x64Offset{}; // x64 code offset of segment start
uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly)
// list of intermediate instructions in this segment
PPCRecImlInstruction_t* imlList{};
sint32 imlListSize{};
sint32 imlListCount{};
// segment link
_PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch
_PPCRecImlSegment_t* nextSegmentBranchTaken{};
bool nextSegmentIsUncertain{};
sint32 loopDepth{};
//sList_t* list_prevSegments;
std::vector<_PPCRecImlSegment_t*> list_prevSegments{};
// PPC range of segment
uint32 ppcAddrMin{};
uint32 ppcAddrMax{};
// enterable segments
bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
uint32 enterPPCAddress{}; // used if isEnterable is true
// jump destination segments
bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps
uint32 jumpDestinationPPCAddress{};
// PPC FPR use mask
bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR
// CR use mask
uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten)
uint32 crBitsRead{}; // all bits that are read in this segment
uint32 crBitsWritten{}; // bits that are written in this segment
// register allocator info
PPCSegmentRegisterAllocatorInfo_t raInfo{};
PPCRecVGPRDistances_t raDistances{};
bool raRangeExtendProcessed{};
// segment points
ppcRecompilerSegmentPoint_t* segmentPointList{};
}PPCRecImlSegment_t;
struct ppcImlGenContext_t
{
PPCRecFunction_t* functionRef;
uint32* currentInstruction;
uint32 ppcAddressOfCurrentInstruction;
// fpr mode
bool LSQE{ true };
bool PSE{ true };
// cycle counter
uint32 cyclesSinceLastBranch; // used to track ppc cycles
// temporary general purpose registers
uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR];
// temporary floating point registers (single and double precision)
uint32 mappedFPRRegister[256];
// list of intermediate instructions
PPCRecImlInstruction_t* imlList;
sint32 imlListSize;
sint32 imlListCount;
// list of segments
PPCRecImlSegment_t** segmentList;
sint32 segmentListSize;
sint32 segmentListCount;
// code generation control
bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode
// register allocator info
struct
{
std::vector<raLivenessRange_t*> list_ranges;
}raInfo;
// analysis info
struct
{
bool modifiesGQR[8];
}tracking;
};
typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)();
typedef struct
{
PPCRecFunction_t* ppcRecompilerFuncTable[PPC_REC_ALIGN_TO_4MB(PPC_REC_CODE_AREA_SIZE/4)]; // one virtual-function pointer for each potential ppc instruction
PPCREC_JUMP_ENTRY ppcRecompilerDirectJumpTable[PPC_REC_ALIGN_TO_4MB(PPC_REC_CODE_AREA_SIZE/4)]; // lookup table for ppc offset to native code function
// x64 data
uint64 __declspec(align(16)) _x64XMM_xorNegateMaskBottom[2];
uint64 __declspec(align(16)) _x64XMM_xorNegateMaskPair[2];
uint64 __declspec(align(16)) _x64XMM_xorNOTMask[2];
uint64 __declspec(align(16)) _x64XMM_andAbsMaskBottom[2];
uint64 __declspec(align(16)) _x64XMM_andAbsMaskPair[2];
uint32 __declspec(align(16)) _x64XMM_andFloatAbsMaskBottom[4];
uint64 __declspec(align(16)) _x64XMM_singleWordMask[2];
double __declspec(align(16)) _x64XMM_constDouble1_1[2];
double __declspec(align(16)) _x64XMM_constDouble0_0[2];
float __declspec(align(16)) _x64XMM_constFloat0_0[2];
float __declspec(align(16)) _x64XMM_constFloat1_1[2];
float __declspec(align(16)) _x64XMM_constFloatMin[2];
uint32 __declspec(align(16)) _x64XMM_flushDenormalMask1[4];
uint32 __declspec(align(16)) _x64XMM_flushDenormalMaskResetSignBits[4];
// PSQ load/store scale tables
double _psq_ld_scale_ps0_ps1[64 * 2];
double _psq_ld_scale_ps0_1[64 * 2];
double _psq_st_scale_ps0_ps1[64 * 2];
double _psq_st_scale_ps0_1[64 * 2];
// MXCSR
uint32 _x64XMM_mxCsr_ftzOn;
uint32 _x64XMM_mxCsr_ftzOff;
}PPCRecompilerInstanceData_t;
extern __declspec(dllexport) PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
extern bool ppcRecompilerEnabled;
__declspec(dllexport) void PPCRecompiler_init();
void PPCRecompiler_allocateRange(uint32 startAddress, uint32 size);
DLLEXPORT void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr);
extern void ATTR_MS_ABI (*PPCRecompiler_enterRecompilerCode)(uint64 codeMem, uint64 ppcInterpreterInstance);
extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_visited)();
extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
#define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1)
// CPUID
extern __declspec(dllexport) bool hasLZCNTSupport;
extern __declspec(dllexport) bool hasMOVBESupport;
extern __declspec(dllexport) bool hasBMI2Support;
extern __declspec(dllexport) bool hasAVXSupport;
// todo - move some of the stuff above into PPCRecompilerInternal.h
// recompiler interface
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress);
void PPCRecompiler_attemptEnter(struct PPCInterpreter_t* hCPU, uint32 enterAddress);
void PPCRecompiler_attemptEnterWithoutRecompile(struct PPCInterpreter_t* hCPU, uint32 enterAddress);

View file

@ -0,0 +1,422 @@
#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example)
enum
{
PPCREC_IML_OP_ASSIGN, // '=' operator
PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
PPCREC_IML_OP_ADD, // '+' operator
PPCREC_IML_OP_SUB, // '-' operator
PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit
PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr)
PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr)
PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit
PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit
PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag
PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag
// assign operators with cast
PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend
PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend
// binary operation
PPCREC_IML_OP_OR, // '|' operator
PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first
PPCREC_IML_OP_AND, // '&' operator
PPCREC_IML_OP_XOR, // '^' operator
PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned)
PPCREC_IML_OP_NOT, // complement each bit
PPCREC_IML_OP_NEG, // negate
// ppc
PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask)
PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag)
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
PPCREC_IML_OP_CNTLZW,
PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry)
PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
PPCREC_IML_OP_MFCR, // copy cr to gpr
PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask)
// condition register
PPCREC_IML_OP_CR_CLEAR, // clear cr bit
PPCREC_IML_OP_CR_SET, // set cr bit
PPCREC_IML_OP_CR_OR, // OR cr bits
PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first
PPCREC_IML_OP_CR_AND, // AND cr bits
PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first
// FPU
PPCREC_IML_OP_FPR_ADD_BOTTOM,
PPCREC_IML_OP_FPR_ADD_PAIR,
PPCREC_IML_OP_FPR_SUB_PAIR,
PPCREC_IML_OP_FPR_SUB_BOTTOM,
PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM,
PPCREC_IML_OP_FPR_MULTIPLY_PAIR,
PPCREC_IML_OP_FPR_DIVIDE_BOTTOM,
PPCREC_IML_OP_FPR_DIVIDE_PAIR,
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP,
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP,
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM,
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched
PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half
PPCREC_IML_OP_FPR_FCMPO_BOTTOM,
PPCREC_IML_OP_FPR_FCMPU_BOTTOM,
PPCREC_IML_OP_FPR_FCMPU_TOP,
PPCREC_IML_OP_FPR_NEGATE_BOTTOM,
PPCREC_IML_OP_FPR_NEGATE_PAIR,
PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0)
PPCREC_IML_OP_FPR_ABS_PAIR,
PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy)
PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy)
PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0)
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision
PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT,
PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
// PS
PPCREC_IML_OP_FPR_SUM0,
PPCREC_IML_OP_FPR_SUM1,
};
#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
enum
{
PPCREC_IML_MACRO_BLR, // macro for BLR instruction code
PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code
PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code
PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code
PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
PPCREC_IML_MACRO_B_FAR, // branch to different function
PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
PPCREC_IML_MACRO_HLE, // HLE function call
PPCREC_IML_MACRO_MFTB, // get TB register value (low or high)
PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
// debugging
PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
};
enum
{
PPCREC_JUMP_CONDITION_NONE,
PPCREC_JUMP_CONDITION_E, // equal / zero
PPCREC_JUMP_CONDITION_NE, // not equal / not zero
PPCREC_JUMP_CONDITION_LE, // less or equal
PPCREC_JUMP_CONDITION_L, // less
PPCREC_JUMP_CONDITION_GE, // greater or equal
PPCREC_JUMP_CONDITION_G, // greater
// special case:
PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling
PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow
};
enum
{
PPCREC_CR_MODE_COMPARE_SIGNED,
PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare
// others: PPCREC_CR_MODE_ARITHMETIC,
PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code)
PPCREC_CR_MODE_LOGICAL,
};
enum
{
PPCREC_IML_TYPE_NONE,
PPCREC_IML_TYPE_NO_OP, // no-op instruction
PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction)
PPCREC_IML_TYPE_R_R, // r* (op) *r
PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
PPCREC_IML_TYPE_R_NAME, // r* = name
PPCREC_IML_TYPE_NAME_R, // name* = r*
PPCREC_IML_TYPE_R_S32, // r* (op) imm
PPCREC_IML_TYPE_MACRO,
PPCREC_IML_TYPE_CJUMP, // conditional jump
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0
PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable
PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands)
// conditional
PPCREC_IML_TYPE_CONDITIONAL_R_S32,
// FPR
PPCREC_IML_TYPE_FPR_R_NAME, // name = f*
PPCREC_IML_TYPE_FPR_NAME_R, // f* = name
PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
PPCREC_IML_TYPE_FPR_R_R,
PPCREC_IML_TYPE_FPR_R_R_R,
PPCREC_IML_TYPE_FPR_R_R_R_R,
PPCREC_IML_TYPE_FPR_R,
// special
PPCREC_IML_TYPE_MEM2MEM, // memory to memory copy (deprecated)
};
enum
{
PPCREC_NAME_NONE,
PPCREC_NAME_TEMPORARY,
PPCREC_NAME_R0 = 1000,
PPCREC_NAME_SPR0 = 2000,
PPCREC_NAME_FPR0 = 3000,
PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7
//PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away)
};
// special cases for LOAD/STORE
#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value)
#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid)
#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1
#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2
#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3
#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1
#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2
#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3
#define PPC_REC_INVALID_REGISTER 0xFF
#define PPCREC_CR_BIT_LT 0
#define PPCREC_CR_BIT_GT 1
#define PPCREC_CR_BIT_EQ 2
#define PPCREC_CR_BIT_SO 3
enum
{
// fpr load
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0,
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1,
PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0,
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0,
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1,
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0,
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1,
PPCREC_FPR_LD_MODE_PSQ_S16_PS0,
PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1,
PPCREC_FPR_LD_MODE_PSQ_U16_PS0,
PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1,
PPCREC_FPR_LD_MODE_PSQ_S8_PS0,
PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1,
PPCREC_FPR_LD_MODE_PSQ_U8_PS0,
PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1,
// fpr store
PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0
PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0
PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1,
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0,
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1,
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0,
PPCREC_FPR_ST_MODE_PSQ_S8_PS0,
PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1,
PPCREC_FPR_ST_MODE_PSQ_U8_PS0,
PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1,
PPCREC_FPR_ST_MODE_PSQ_U16_PS0,
PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1,
PPCREC_FPR_ST_MODE_PSQ_S16_PS0,
PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1,
};
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set<uint32>& entryAddresses);
void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor
PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount);
PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index);
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count);
void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index);
void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint);
// GPR register management
uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
// FPR register management
uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
// IML instruction generation
void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress);
void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode);
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0);
// IML instruction generation (new style, can generate new instructions but also overwrite existing ones)
void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth);
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER);
// IML generation - FPU
bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FMR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FNABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_ABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MERGE00(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MERGE01(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
// IML general
bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml);
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew);
void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment);
// IML analyzer
typedef struct
{
uint32 readCRBits;
uint32 writtenCRBits;
}PPCRecCRTracking_t;
bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment);
bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction);
void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking);
// IML optimizer
bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext);
// IML register allocator
void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext);
// late optimizations
void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext);
// debug
void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
typedef struct
{
union
{
struct
{
sint16 readNamedReg1;
sint16 readNamedReg2;
sint16 readNamedReg3;
sint16 writtenNamedReg1;
};
sint16 gpr[4]; // 3 read + 1 write
};
// FPR
union
{
struct
{
// note: If destination operand is not fully written, it will be added as a read FPR as well
sint16 readFPR1;
sint16 readFPR2;
sint16 readFPR3;
sint16 readFPR4; // usually this is set to the result FPR if only partially overwritten
sint16 writtenFPR1;
};
sint16 fpr[4];
};
}PPCImlOptimizerUsedRegisters_t;
void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed);

View file

@ -0,0 +1,137 @@
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
#include "util/helpers/fixedSizeList.h"
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
/*
* Initializes a single segment and returns true if it is a finite loop
*/
bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment)
{
bool isTightFiniteLoop = false;
// base criteria, must jump to beginning of same segment
if (imlSegment->nextSegmentBranchTaken != imlSegment)
return false;
// loops using BDNZ are assumed to always be finite
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
{
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB && imlSegment->imlList[t].crRegister == 8)
{
return true;
}
}
// for non-BDNZ loops, check for common patterns
// risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB)
// this catches most loops with load-update and store-update instructions, but also those with decrementing counters
FixedSizeList<sint32, 64, true> list_modifiedRegisters;
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
{
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB) )
{
list_modifiedRegisters.addUnique(imlSegment->imlList[t].op_r_immS32.registerIndex);
}
}
if (list_modifiedRegisters.count > 0)
{
// remove all registers from the list that are modified by non-ADD/SUB instructions
// todo: We should also cover the case where ADD+SUB on the same register cancel the effect out
PPCImlOptimizerUsedRegisters_t registersUsed;
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
{
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB))
continue;
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + t, &registersUsed);
if(registersUsed.writtenNamedReg1 < 0)
continue;
list_modifiedRegisters.remove(registersUsed.writtenNamedReg1);
}
if (list_modifiedRegisters.count > 0)
{
return true;
}
}
return false;
}
/*
* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister)
*/
bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction)
{
if (imlInstruction->type == PPCREC_IML_TYPE_R_R)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
return true;
return false;
}
void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking)
{
crTracking->readCRBits = 0;
crTracking->writtenCRBits = 0;
if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP)
{
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
{
uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex);
crTracking->readCRBits = (crBitFlag);
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex);
crTracking->readCRBits = crBitFlag;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR)
{
crTracking->readCRBits = 0xFFFFFFFF;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF)
{
crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
}
else if (imlInstruction->type == PPCREC_IML_TYPE_CR)
{
if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR ||
imlInstruction->operation == PPCREC_IML_OP_CR_SET)
{
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
crTracking->writtenCRBits = crBitFlag;
}
else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR ||
imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
imlInstruction->operation == PPCREC_IML_OP_CR_AND ||
imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
{
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
crTracking->writtenCRBits = crBitFlag;
crBitFlag = 1 << (imlInstruction->op_cr.crA);
crTracking->readCRBits = crBitFlag;
crBitFlag = 1 << (imlInstruction->op_cr.crB);
crTracking->readCRBits |= crBitFlag;
}
else
assert_dbg();
}
else if (PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7)
{
crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4));
}
else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
{
// overwrites CR0
crTracking->writtenCRBits |= (0xF << 0);
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,399 @@
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
#include "PPCRecompilerX64.h"
#include "PPCRecompilerImlRanges.h"
#include "util/helpers/MemoryPool.h"
void PPCRecRARange_addLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
{
#ifndef PUBLIC_RELEASE
if ((*root) && (*root)->range->virtualRegister != subrange->range->virtualRegister)
assert_dbg();
#endif
subrange->link_sameVirtualRegisterGPR.next = *root;
if (*root)
(*root)->link_sameVirtualRegisterGPR.prev = subrange;
subrange->link_sameVirtualRegisterGPR.prev = nullptr;
*root = subrange;
}
void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
{
subrange->link_segmentSubrangesGPR.next = *root;
if (*root)
(*root)->link_segmentSubrangesGPR.prev = subrange;
subrange->link_segmentSubrangesGPR.prev = nullptr;
*root = subrange;
}
void PPCRecRARange_removeLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
{
raLivenessSubrange_t* tempPrev = subrange->link_sameVirtualRegisterGPR.prev;
if (subrange->link_sameVirtualRegisterGPR.prev)
subrange->link_sameVirtualRegisterGPR.prev->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next;
else
(*root) = subrange->link_sameVirtualRegisterGPR.next;
if (subrange->link_sameVirtualRegisterGPR.next)
subrange->link_sameVirtualRegisterGPR.next->link_sameVirtualRegisterGPR.prev = tempPrev;
#ifndef PUBLIC_RELEASE
subrange->link_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1;
subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1;
#endif
}
void PPCRecRARange_removeLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
{
raLivenessSubrange_t* tempPrev = subrange->link_segmentSubrangesGPR.prev;
if (subrange->link_segmentSubrangesGPR.prev)
subrange->link_segmentSubrangesGPR.prev->link_segmentSubrangesGPR.next = subrange->link_segmentSubrangesGPR.next;
else
(*root) = subrange->link_segmentSubrangesGPR.next;
if (subrange->link_segmentSubrangesGPR.next)
subrange->link_segmentSubrangesGPR.next->link_segmentSubrangesGPR.prev = tempPrev;
#ifndef PUBLIC_RELEASE
subrange->link_segmentSubrangesGPR.prev = (raLivenessSubrange_t*)1;
subrange->link_segmentSubrangesGPR.next = (raLivenessSubrange_t*)1;
#endif
}
MemoryPoolPermanentObjects<raLivenessRange_t> memPool_livenessRange(4096);
MemoryPoolPermanentObjects<raLivenessSubrange_t> memPool_livenessSubrange(4096);
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name)
{
raLivenessRange_t* livenessRange = memPool_livenessRange.acquireObj();
livenessRange->list_subranges.resize(0);
livenessRange->virtualRegister = virtualRegister;
livenessRange->name = name;
livenessRange->physicalRegister = -1;
ppcImlGenContext->raInfo.list_ranges.push_back(livenessRange);
return livenessRange;
}
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex)
{
raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj();
livenessSubrange->list_locations.resize(0);
livenessSubrange->range = range;
livenessSubrange->imlSegment = imlSegment;
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->start, imlSegment, startIndex);
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->end, imlSegment, endIndex);
// default values
livenessSubrange->hasStore = false;
livenessSubrange->hasStoreDelayed = false;
livenessSubrange->lastIterationIndex = 0;
livenessSubrange->subrangeBranchNotTaken = nullptr;
livenessSubrange->subrangeBranchTaken = nullptr;
livenessSubrange->_noLoad = false;
// add to range
range->list_subranges.push_back(livenessSubrange);
// add to segment
PPCRecRARange_addLink_perVirtualGPR(&(imlSegment->raInfo.linkedList_perVirtualGPR[range->virtualRegister]), livenessSubrange);
PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange);
return livenessSubrange;
}
void _unlinkSubrange(raLivenessSubrange_t* subrange)
{
PPCRecImlSegment_t* imlSegment = subrange->imlSegment;
PPCRecRARange_removeLink_perVirtualGPR(&imlSegment->raInfo.linkedList_perVirtualGPR[subrange->range->virtualRegister], subrange);
PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange);
}
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange)
{
_unlinkSubrange(subrange);
subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange));
subrange->list_locations.clear();
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
memPool_livenessSubrange.releaseObj(subrange);
}
void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange)
{
_unlinkSubrange(subrange);
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
memPool_livenessSubrange.releaseObj(subrange);
}
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
{
for (auto& subrange : range->list_subranges)
{
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
}
ppcImlGenContext->raInfo.list_ranges.erase(std::find(ppcImlGenContext->raInfo.list_ranges.begin(), ppcImlGenContext->raInfo.list_ranges.end(), range));
memPool_livenessRange.releaseObj(range);
}
void PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
{
for (auto& subrange : range->list_subranges)
{
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
}
memPool_livenessRange.releaseObj(range);
}
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
{
for(auto& range : ppcImlGenContext->raInfo.list_ranges)
{
PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext, range);
}
ppcImlGenContext->raInfo.list_ranges.clear();
}
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange)
{
cemu_assert_debug(range != absorbedRange);
cemu_assert_debug(range->virtualRegister == absorbedRange->virtualRegister);
// move all subranges from absorbedRange to range
for (auto& subrange : absorbedRange->list_subranges)
{
range->list_subranges.push_back(subrange);
subrange->range = range;
}
absorbedRange->list_subranges.clear();
PPCRecRA_deleteRange(ppcImlGenContext, absorbedRange);
}
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange)
{
#ifndef PUBLIC_RELEASE
PPCRecRA_debugValidateSubrange(subrange);
PPCRecRA_debugValidateSubrange(absorbedSubrange);
if (subrange->imlSegment != absorbedSubrange->imlSegment)
assert_dbg();
if (subrange->end.index > absorbedSubrange->start.index)
assert_dbg();
if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
assert_dbg();
if (subrange == absorbedSubrange)
assert_dbg();
#endif
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
// merge usage locations
for (auto& location : absorbedSubrange->list_locations)
{
subrange->list_locations.push_back(location);
}
absorbedSubrange->list_locations.clear();
subrange->end.index = absorbedSubrange->end.index;
PPCRecRA_debugValidateSubrange(subrange);
PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange);
}
// remove all inter-segment connections from the range and split it into local ranges (also removes empty ranges)
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
{
if (range->list_subranges.size() == 1)
assert_dbg();
for (auto& subrange : range->list_subranges)
{
if (subrange->list_locations.empty())
continue;
raLivenessRange_t* newRange = PPCRecRA_createRangeBase(ppcImlGenContext, range->virtualRegister, range->name);
raLivenessSubrange_t* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, newRange, subrange->imlSegment, subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1);
// copy locations
for (auto& location : subrange->list_locations)
{
newSubrange->list_locations.push_back(location);
}
}
// remove original range
PPCRecRA_deleteRange(ppcImlGenContext, range);
}
#ifndef PUBLIC_RELEASE
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange)
{
// validate subrange
if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken)
assert_dbg();
if (subrange->subrangeBranchNotTaken && subrange->subrangeBranchNotTaken->imlSegment != subrange->imlSegment->nextSegmentBranchNotTaken)
assert_dbg();
}
#else
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {}
#endif
// split subrange at the given index
// After the split there will be two ranges/subranges:
// head -> subrange is shortned to end at splitIndex
// tail -> a new subrange that reaches from splitIndex to the end of the original subrange
// if head has a physical register assigned it will not carry over to tail
// The return value is the tail subrange
// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations
// Ranges that begin at RA_INTER_RANGE_START are allowed and can be split
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole)
{
// validation
#ifndef PUBLIC_RELEASE
if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START)
assert_dbg();
if (subrange->start.index >= splitIndex)
assert_dbg();
if (subrange->end.index <= splitIndex)
assert_dbg();
#endif
// create tail
raLivenessRange_t* tailRange = PPCRecRA_createRangeBase(ppcImlGenContext, subrange->range->virtualRegister, subrange->range->name);
raLivenessSubrange_t* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, tailRange, subrange->imlSegment, splitIndex, subrange->end.index);
// copy locations
for (auto& location : subrange->list_locations)
{
if (location.index >= splitIndex)
tailSubrange->list_locations.push_back(location);
}
// remove tail locations from head
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
{
raLivenessLocation_t* location = subrange->list_locations.data() + i;
if (location->index >= splitIndex)
{
subrange->list_locations.resize(i);
break;
}
}
// adjust start/end
if (trimToHole)
{
if (subrange->list_locations.empty())
{
subrange->end.index = subrange->start.index+1;
}
else
{
subrange->end.index = subrange->list_locations.back().index + 1;
}
if (tailSubrange->list_locations.empty())
{
assert_dbg(); // should not happen? (In this case we can just avoid generating a tail at all)
}
else
{
tailSubrange->start.index = tailSubrange->list_locations.front().index;
}
}
return tailSubrange;
}
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite)
{
if (subrange->list_locations.empty())
{
subrange->list_locations.emplace_back(index, isRead, isWrite);
return;
}
raLivenessLocation_t* lastLocation = subrange->list_locations.data() + (subrange->list_locations.size() - 1);
cemu_assert_debug(lastLocation->index <= index);
if (lastLocation->index == index)
{
// update
lastLocation->isRead = lastLocation->isRead || isRead;
lastLocation->isWrite = lastLocation->isWrite || isWrite;
return;
}
// add new
subrange->list_locations.emplace_back(index, isRead, isWrite);
}
sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment)
{
sint32 v = imlSegment->loopDepth + 1;
v *= 5;
return v*v; // 25, 100, 225, 400
}
// calculate cost of entire range
// ignores data flow and does not detect avoidable reads/stores
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range)
{
sint32 cost = 0;
// todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code).
// currently we calculate the cost based on the most expensive entry/exit point
sint32 mostExpensiveRead = 0;
sint32 mostExpensiveWrite = 0;
sint32 readCount = 0;
sint32 writeCount = 0;
for (auto& subrange : range->list_subranges)
{
if (subrange->start.index != RA_INTER_RANGE_START)
{
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
readCount++;
}
if (subrange->end.index != RA_INTER_RANGE_END)
{
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
writeCount++;
}
}
cost = mostExpensiveRead + mostExpensiveWrite;
cost = cost + (readCount + writeCount) / 10;
return cost;
}
// calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range)
{
sint32 cost = -PPCRecRARange_estimateCost(range);
for (auto& subrange : range->list_subranges)
{
if (subrange->list_locations.empty())
continue;
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // we assume a read and a store
}
return cost;
}
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex)
{
// validation
#ifndef PUBLIC_RELEASE
if (subrange->end.index == RA_INTER_RANGE_END)
assert_dbg();
#endif
sint32 cost = 0;
// find split position in location list
if (subrange->list_locations.empty())
{
assert_dbg(); // should not happen?
return 0;
}
if (splitIndex <= subrange->list_locations.front().index)
return 0;
if (splitIndex > subrange->list_locations.back().index)
return 0;
// todo - determine exact cost of split subranges
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store
//for (sint32 f = 0; f < subrange->list_locations.size(); f++)
//{
// raLivenessLocation_t* location = subrange->list_locations.data() + f;
// if (location->index >= splitIndex)
// {
// ...
// return cost;
// }
//}
return cost;
}

View file

@ -0,0 +1,27 @@
#pragma once
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name);
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex);
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange);
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange);
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange);
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole = false);
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite);
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange);
// cost estimation
sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment);
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range);
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range);
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex);
// special values to mark the index of ranges that reach across the segment border
#define RA_INTER_RANGE_START (-1)
#define RA_INTER_RANGE_END (0x70000000)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,414 @@
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
#include "PPCRecompilerX64.h"
#include "PPCRecompilerImlRanges.h"
#include <queue>
bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR)
{
return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX);
}
void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
{
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
{
imlSegment->raDistances.reg[i].usageStart = INT_MAX;
imlSegment->raDistances.reg[i].usageEnd = INT_MIN;
}
// scan instructions for usage range
sint32 index = 0;
PPCImlOptimizerUsedRegisters_t gprTracking;
while (index < imlSegment->imlListCount)
{
// end loop at suffix instruction
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
break;
// get accessed GPRs
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
for (sint32 t = 0; t < 4; t++)
{
sint32 virtualRegister = gprTracking.gpr[t];
if (virtualRegister < 0)
continue;
cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR);
imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction
imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index+1); // index after instruction
}
// next instruction
index++;
}
}
void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
{
// for each register calculate min/max index of usage range within each segment
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
{
PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, ppcImlGenContext->segmentList[s]);
}
}
raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range)
{
if (imlSegment->raDistances.isProcessed[vGPR])
{
// return already existing segment
return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR];
}
imlSegment->raDistances.isProcessed[vGPR] = true;
if (_isRangeDefined(imlSegment, vGPR) == false)
return nullptr;
// create subrange
cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr);
raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd);
// traverse forward
if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
{
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
{
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range);
cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START);
}
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
{
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range);
cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START);
}
}
// traverse backward
if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
{
for (auto& it : imlSegment->list_prevSegments)
{
if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range);
}
}
// return subrange
return subrange;
}
void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
{
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
{
if( _isRangeDefined(imlSegment, i) == false )
continue;
if( imlSegment->raDistances.isProcessed[i])
continue;
raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]);
PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range);
}
// create lookup table of ranges
raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR];
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
{
vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i];
#ifndef PUBLIC_RELEASE
if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr)
assert_dbg();
#endif
}
// parse instructions and convert to locations
sint32 index = 0;
PPCImlOptimizerUsedRegisters_t gprTracking;
while (index < imlSegment->imlListCount)
{
// end loop at suffix instruction
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
break;
// get accessed GPRs
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
// handle accessed GPR
for (sint32 t = 0; t < 4; t++)
{
sint32 virtualRegister = gprTracking.gpr[t];
if (virtualRegister < 0)
continue;
bool isWrite = (t == 3);
// add location
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite);
#ifndef PUBLIC_RELEASE
if (index < vGPR2Subrange[virtualRegister]->start.index)
assert_dbg();
if (index+1 > vGPR2Subrange[virtualRegister]->end.index)
assert_dbg();
#endif
}
// next instruction
index++;
}
}
void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
{
if (_isRangeDefined(imlSegment, vGPR) == false)
{
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END;
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
return;
}
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
}
void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
{
if (_isRangeDefined(imlSegment, vGPR) == false)
{
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START;
}
else
{
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
}
// propagate backwards
for (auto& it : imlSegment->list_prevSegments)
{
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR);
}
}
void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth)
{
#ifndef PUBLIC_RELEASE
if (routeDepth < 2)
assert_dbg();
#endif
// extend starting range to end of segment
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR);
// extend all the connecting segments in both directions
for (sint32 i = 1; i < (routeDepth - 1); i++)
{
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR);
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR);
}
// extend the final segment towards the beginning
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth-1], vGPR);
}
void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth)
{
if (routeDepth >= 64)
{
forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress);
return;
}
route[routeDepth] = currentSegment;
if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX)
{
// measure distance to end of segment
distanceLeft -= currentSegment->imlListCount;
if (distanceLeft > 0)
{
if (currentSegment->nextSegmentBranchNotTaken)
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1);
if (currentSegment->nextSegmentBranchTaken)
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1);
}
return;
}
else
{
// measure distance to range
if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END)
{
if (distanceLeft < currentSegment->imlListCount)
return; // range too far away
}
else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft)
return; // out of range
// found close range -> connect ranges
_PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1);
}
}
void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR)
{
#ifndef PUBLIC_RELEASE
if (currentSegment->raDistances.reg[vGPR].usageEnd < 0)
assert_dbg();
#endif
// count instructions to end of initial segment
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START)
assert_dbg();
sint32 instructionsUntilEndOfSeg;
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
instructionsUntilEndOfSeg = 0;
else
instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd;
#ifndef PUBLIC_RELEASE
if (instructionsUntilEndOfSeg < 0)
assert_dbg();
#endif
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
if (remainingScanDist <= 0)
return; // can't reach end
// also dont forget: Extending is easier if we allow 'non symetric' branches. E.g. register range one enters one branch
PPCRecImlSegment_t* route[64];
route[0] = currentSegment;
if (currentSegment->nextSegmentBranchNotTaken)
{
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1);
}
if (currentSegment->nextSegmentBranchTaken)
{
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1);
}
}
void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
{
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
{
if(imlSegment->raDistances.reg[i].usageStart == INT_MAX)
continue; // not used
// check and extend if possible
PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i);
}
#ifndef PUBLIC_RELEASE
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
assert_dbg();
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
assert_dbg();
#endif
}
void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
{
std::vector<PPCRecImlSegment_t*> list_segments;
list_segments.reserve(1000);
sint32 index = 0;
imlSegment->raRangeExtendProcessed = true;
list_segments.push_back(imlSegment);
while (index < list_segments.size())
{
PPCRecImlSegment_t* currentSegment = list_segments[index];
PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment);
// follow flow
if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false)
{
currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true;
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
}
if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false)
{
currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true;
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
}
index++;
}
}
void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext)
{
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
{
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
if (imlSegment->list_prevSegments.empty())
{
if (imlSegment->raRangeExtendProcessed)
assert_dbg(); // should not happen
PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment);
}
}
}
void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext)
{
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
{
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
auto localLoopDepth = imlSegment->loopDepth;
if( localLoopDepth <= 0 )
continue; // not inside a loop
// look for loop exit
bool hasLoopExit = false;
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
{
hasLoopExit = true;
}
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
{
hasLoopExit = true;
}
if(hasLoopExit == false)
continue;
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
{
if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END)
continue; // range not set or does not reach end of segment
if(imlSegment->nextSegmentBranchTaken)
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i);
if(imlSegment->nextSegmentBranchNotTaken)
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i);
}
}
}
void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
{
// merge close ranges
PPCRecRA_mergeCloseRangesV2(ppcImlGenContext);
// extra pass to move register stores out of loops
PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext);
// calculate liveness ranges
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
{
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment);
}
}
void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange)
{
bool isRead = false;
bool isWritten = false;
bool isOverwritten = false;
for (auto& location : subrange->list_locations)
{
if (location.isRead)
{
isRead = true;
}
if (location.isWrite)
{
if (isRead == false)
isOverwritten = true;
isWritten = true;
}
}
subrange->_noLoad = isOverwritten;
subrange->hasStore = isWritten;
if (subrange->start.index == RA_INTER_RANGE_START)
subrange->_noLoad = true;
}
void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange);
void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext)
{
// this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore
// first do a per-subrange pass
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
{
for (auto& subrange : range->list_subranges)
{
PPCRecRA_analyzeSubrangeDataDependencyV2(subrange);
}
}
// then do a second pass where we scan along subrange flow
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
{
for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm
{
_analyzeRangeDataFlow(subrange);
}
}
}

View file

@ -0,0 +1,173 @@
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset)
{
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
{
if( ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset )
{
return ppcImlGenContext->segmentList[s];
}
}
debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset);
return NULL;
}
void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
{
// make sure segments aren't already linked
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
return;
// add as next segment for source
if (imlSegmentSrc->nextSegmentBranchNotTaken != NULL)
assert_dbg();
imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst;
// add as previous segment for destination
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
}
void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
{
// make sure segments aren't already linked
if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
return;
// add as next segment for source
if (imlSegmentSrc->nextSegmentBranchTaken != NULL)
assert_dbg();
imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst;
// add as previous segment for destination
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
}
void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
{
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
{
imlSegmentSrc->nextSegmentBranchNotTaken = NULL;
}
else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
{
imlSegmentSrc->nextSegmentBranchTaken = NULL;
}
else
assert_dbg();
bool matchFound = false;
for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++)
{
if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc)
{
imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin()+i);
matchFound = true;
break;
}
}
if (matchFound == false)
assert_dbg();
}
/*
* Replaces all links to segment orig with linkts to segment new
*/
void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew)
{
while (imlSegmentOrig->list_prevSegments.size() != 0)
{
PPCRecImlSegment_t* prevSegment = imlSegmentOrig->list_prevSegments[0];
if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig)
{
PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig);
PPCRecompilerIml_setLinkBranchNotTaken(prevSegment, imlSegmentNew);
}
else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig)
{
PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig);
PPCRecompilerIml_setLinkBranchTaken(prevSegment, imlSegmentNew);
}
else
{
assert_dbg();
}
}
}
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext)
{
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
{
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
bool isLastSegment = (s+1)>=ppcImlGenContext->segmentListCount;
PPCRecImlSegment_t* nextSegment = isLastSegment?NULL:ppcImlGenContext->segmentList[s+1];
// handle empty segment
if( imlSegment->imlListCount == 0 )
{
if (isLastSegment == false)
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList[s+1]); // continue execution to next segment
else
imlSegment->nextSegmentIsUncertain = true;
continue;
}
// check last instruction of segment
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+(imlSegment->imlListCount-1);
if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
{
// find destination segment by ppc jump address
PPCRecImlSegment_t* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress);
if( jumpDestSegment )
{
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment);
PPCRecompilerIml_setLinkBranchTaken(imlSegment, jumpDestSegment);
}
else
{
imlSegment->nextSegmentIsUncertain = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
{
// currently we assume that the next segment is unknown for all macros
imlSegment->nextSegmentIsUncertain = true;
}
else
{
// all other instruction types do not branch
//imlSegment->nextSegment[0] = nextSegment;
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment);
//imlSegment->nextSegmentIsUncertain = true;
}
}
}
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext)
{
sint32 initialSegmentCount = ppcImlGenContext->segmentListCount;
for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++)
{
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i];
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
{
// spawn new segment at end
PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentListCount, 1);
PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount-1];
entrySegment->isEnterable = true;
entrySegment->enterPPCAddress = imlSegment->enterPPCAddress;
// create jump instruction
PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1);
PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList + 0);
PPCRecompilerIml_setLinkBranchTaken(entrySegment, imlSegment);
// remove enterable flag from original segment
imlSegment->isEnterable = false;
imlSegment->enterPPCAddress = 0;
}
}
}
PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment)
{
if (imlSegment->imlListCount == 0)
return nullptr;
return imlSegment->imlList + (imlSegment->imlListCount - 1);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,332 @@
typedef struct
{
uint32 offset;
uint8 type;
void* extraInfo;
}x64RelocEntry_t;
typedef struct
{
uint8* codeBuffer;
sint32 codeBufferIndex;
sint32 codeBufferSize;
// cr state
sint32 activeCRRegister; // current x86 condition flags reflect this cr* register
sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned)
// relocate offsets
x64RelocEntry_t* relocateOffsetTable;
sint32 relocateOffsetTableSize;
sint32 relocateOffsetTableCount;
}x64GenContext_t;
// Some of these are defined by winnt.h and gnu headers
#undef REG_EAX
#undef REG_ECX
#undef REG_EDX
#undef REG_EBX
#undef REG_ESP
#undef REG_EBP
#undef REG_ESI
#undef REG_EDI
#undef REG_NONE
#undef REG_RAX
#undef REG_RCX
#undef REG_RDX
#undef REG_RBX
#undef REG_RSP
#undef REG_RBP
#undef REG_RSI
#undef REG_RDI
#undef REG_R8
#undef REG_R9
#undef REG_R10
#undef REG_R11
#undef REG_R12
#undef REG_R13
#undef REG_R14
#undef REG_R15
#define REG_EAX 0
#define REG_ECX 1
#define REG_EDX 2
#define REG_EBX 3
#define REG_ESP 4 // reserved for low half of hCPU pointer
#define REG_EBP 5
#define REG_ESI 6
#define REG_EDI 7
#define REG_NONE -1
#define REG_RAX 0
#define REG_RCX 1
#define REG_RDX 2
#define REG_RBX 3
#define REG_RSP 4 // reserved for hCPU pointer
#define REG_RBP 5
#define REG_RSI 6
#define REG_RDI 7
#define REG_R8 8
#define REG_R9 9
#define REG_R10 10
#define REG_R11 11
#define REG_R12 12
#define REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
#define REG_R14 14 // reserved as temporary register
#define REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
#define REG_AL 0
#define REG_CL 1
#define REG_DL 2
#define REG_BL 3
#define REG_AH 4
#define REG_CH 5
#define REG_DH 6
#define REG_BH 7
// reserved registers
#define REG_RESV_TEMP (REG_R14)
#define REG_RESV_HCPU (REG_RSP)
#define REG_RESV_MEMBASE (REG_R13)
#define REG_RESV_RECDATA (REG_R15)
// reserved floating-point registers
#define REG_RESV_FPR_TEMP (15)
extern sint32 x64Gen_registerMap[12];
#define tempToRealRegister(__x) (x64Gen_registerMap[__x])
#define tempToRealFPRRegister(__x) (__x)
#define reg32ToReg16(__x) (__x)
enum
{
X86_CONDITION_EQUAL, // or zero
X86_CONDITION_NOT_EQUAL, // or not zero
X86_CONDITION_SIGNED_LESS, // or not greater/equal
X86_CONDITION_SIGNED_GREATER, // or not less/equal
X86_CONDITION_SIGNED_LESS_EQUAL, // or not greater
X86_CONDITION_SIGNED_GREATER_EQUAL, // or not less
X86_CONDITION_UNSIGNED_BELOW, // or not above/equal
X86_CONDITION_UNSIGNED_ABOVE, // or not below/equal
X86_CONDITION_UNSIGNED_BELOW_EQUAL, // or not above
X86_CONDITION_UNSIGNED_ABOVE_EQUAL, // or not below
X86_CONDITION_CARRY, // carry flag must be set
X86_CONDITION_NOT_CARRY, // carry flag must not be set
X86_CONDITION_SIGN, // sign flag must be set
X86_CONDITION_NOT_SIGN, // sign flag must not be set
X86_CONDITION_PARITY, // parity flag must be set
X86_CONDITION_NONE, // no condition, jump always
};
#define PPCREC_CR_TEMPORARY (8) // never stored
#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI)
#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction
#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset
#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment
#define PPC_X64_GPR_USABLE_REGISTERS (16-4)
#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register
bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext);
void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset);
void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
// ASM gen
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v);
void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v);
void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v);
void x64Emit_mov_reg32_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
void x64Emit_mov_mem32_reg32(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
void x64Emit_mov_mem64_reg64(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
void x64Emit_mov_reg64_mem64(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
void x64Emit_mov_reg64_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
void x64Emit_mov_mem32_reg64(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
void x64Emit_mov_reg64_mem64(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
void x64Emit_mov_reg32_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
void x64Emit_mov_reg64b_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
void x64Emit_movZX_reg32_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
void x64Emit_movZX_reg64_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
void x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
void x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
void x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
void x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_mov_mem32Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32);
void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32);
void x64Gen_mov_mem8Reg64_imm8(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint8 dataImmU8);
void x64Gen_mov_reg64_imm64(x64GenContext_t* x64GenContext, sint32 destRegister, uint64 immU64);
void x64Gen_mov_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 destRegister, uint64 immU32);
void x64Gen_mov_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64);
void x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, uint32 conditionType, sint32 destRegister, sint32 srcRegister);
void x64Gen_mov_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_xchg_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_movZeroExtend_reg64Low32_reg64Low8(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
void x64Gen_and_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_add_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_add_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_add_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_sub_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_sub_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, sint32 memImmS32, uint64 immU32);
void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32);
void x64Gen_imul_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 operandRegister);
void x64Gen_idiv_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
void x64Gen_div_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
void x64Gen_imul_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
void x64Gen_mul_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
void x64Gen_and_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_and_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_test_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_test_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, sint32 immS32);
void x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 memRegister, sint32 memImmS32);
void x64Gen_or_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_or_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_xor_reg32_reg32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_xor_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_xor_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_rol_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
void x64Gen_rol_reg64Low32_cl(x64GenContext_t* x64GenContext, sint32 srcRegister);
void x64Gen_rol_reg64Low16_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
void x64Gen_rol_reg64_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
void x64Gen_shl_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
void x64Gen_shr_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
void x64Gen_sar_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
void x64Gen_not_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_neg_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_cdq(x64GenContext_t* x64GenContext);
void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, sint32 immS32);
void x64Gen_setcc_mem8(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 memoryRegister, uint32 memoryImmU32);
void x64Gen_setcc_reg64b(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 dataRegister);
void x64Gen_bt_mem8(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32, uint8 bitIndex);
void x64Gen_cmc(x64GenContext_t* x64GenContext);
void x64Gen_jmp_imm32(x64GenContext_t* x64GenContext, uint32 destImm32);
void x64Gen_jmp_memReg64(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 immU32);
void x64Gen_jmpc_far(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 relativeDest);
void x64Gen_jmpc_near(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 relativeDest);
void x64Gen_push_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
void x64Gen_pop_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_jmp_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
void x64Gen_call_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
void x64Gen_ret(x64GenContext_t* x64GenContext);
void x64Gen_int3(x64GenContext_t* x64GenContext);
// floating-point (SIMD/SSE) gen
void x64Gen_movaps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSource);
void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc, uint8 imm8);
void x64Gen_addsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_addpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_subsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_subpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_mulsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_divsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_divpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_comisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32);
void x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32);
void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memReg, sint32 memImmS32);
void x64Gen_cvtsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
void x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
void x64Gen_sqrtsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_sqrtpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movd_xmmReg_reg64Low32(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
void x64Gen_movd_reg64Low32_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
void x64Gen_movq_xmmReg_reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
void x64Gen_movq_reg64_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 xmmRegisterSrc);
// AVX
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
void x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
void x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
// BMI
void x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);

View file

@ -0,0 +1,49 @@
#include "PPCRecompiler.h"
#include "PPCRecompilerX64.h"
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
void _x64Gen_vex128_nds(x64GenContext_t* x64GenContext, uint8 opcodeMap, uint8 additionalOperand, uint8 pp, uint8 vex_ext, uint8 vex_r, uint8 vex_b, uint8 opcode)
{
if(vex_b != 0)
x64Gen_writeU8(x64GenContext, 0xC4); // three byte VEX
else
x64Gen_writeU8(x64GenContext, 0xC5); // two byte VEX
if (vex_b != 0)
{
uint8 vex_x = 0;
x64Gen_writeU8(x64GenContext, (vex_r ? 0x00 : 0x80) | (vex_x ? 0x00 : 0x40) | (vex_b ? 0x00 : 0x20) | 1);
}
x64Gen_writeU8(x64GenContext, (vex_ext<<7) | (((~additionalOperand)&0xF)<<3) | pp);
x64Gen_writeU8(x64GenContext, opcode);
}
#define VEX_PP_0F 0 // guessed
#define VEX_PP_66_0F 1
#define VEX_PP_F3_0F 2 // guessed
#define VEX_PP_F2_0F 3 // guessed
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
{
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x6D);
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
}
void x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
{
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x15);
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
}
void x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
{
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x5C);
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
}

View file

@ -0,0 +1,80 @@
#include "PPCRecompiler.h"
#include "PPCRecompilerX64.h"
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
void x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
{
// MOVBE <dstReg64> (low dword), DWORD [<reg64> + <reg64> + <imm64>]
if( dstRegister >= 8 && memRegisterA64 >= 8 && memRegisterB64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x47);
else if( memRegisterA64 >= 8 && memRegisterB64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x43);
else if( dstRegister >= 8 && memRegisterB64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x42);
else if( dstRegister >= 8 && memRegisterA64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x45);
else if( dstRegister >= 8 )
x64Gen_writeU8(x64GenContext, 0x44);
else if( memRegisterA64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x41);
else if( memRegisterB64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x42);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x38);
x64Gen_writeU8(x64GenContext, 0xF0);
_x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegisterA64, memRegisterB64, memImmS32);
}
void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
{
// MOVBE <dstReg64> (low word), WORD [<reg64> + <reg64> + <imm64>]
// note: Unlike the 32bit version this instruction does not set the upper 32bits of the 64bit register to 0
x64Gen_writeU8(x64GenContext, 0x66); // 16bit prefix
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, dstRegister, memRegisterA64, memRegisterB64, memImmS32);
}
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister)
{
// MOVBE DWORD [<reg64> + <reg64> + <imm64>], <srcReg64> (low dword)
if( srcRegister >= 8 && memRegisterA64 >= 8 && memRegisterB64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x47);
else if( memRegisterA64 >= 8 && memRegisterB64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x43);
else if( srcRegister >= 8 && memRegisterB64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x42);
else if( srcRegister >= 8 && memRegisterA64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x45);
else if( srcRegister >= 8 )
x64Gen_writeU8(x64GenContext, 0x44);
else if( memRegisterA64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x41);
else if( memRegisterB64 >= 8 )
x64Gen_writeU8(x64GenContext, 0x42);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x38);
x64Gen_writeU8(x64GenContext, 0xF1);
_x64Gen_writeMODRMDeprecated(x64GenContext, srcRegister, memRegisterA64, memRegisterB64, memImmS32);
}
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
// SHRX reg64, reg64, reg64
x64Gen_writeU8(x64GenContext, 0xC4);
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
x64Gen_writeU8(x64GenContext, 0xFB - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
// SHLX reg64, reg64, reg64
x64Gen_writeU8(x64GenContext, 0xC4);
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,752 @@
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
#include "PPCRecompilerX64.h"
void x64Gen_genSSEVEXPrefix2(x64GenContext_t* x64GenContext, sint32 xmmRegister1, sint32 xmmRegister2, bool use64BitMode)
{
if( xmmRegister1 < 8 && xmmRegister2 < 8 && use64BitMode == false )
return;
uint8 v = 0x40;
if( xmmRegister1 >= 8 )
v |= 0x01;
if( xmmRegister2 >= 8 )
v |= 0x04;
if( use64BitMode )
v |= 0x08;
x64Gen_writeU8(x64GenContext, v);
}
void x64Gen_genSSEVEXPrefix1(x64GenContext_t* x64GenContext, sint32 xmmRegister, bool use64BitMode)
{
if( xmmRegister < 8 && use64BitMode == false )
return;
uint8 v = 0x40;
if( use64BitMode )
v |= 0x01;
if( xmmRegister >= 8 )
v |= 0x04;
x64Gen_writeU8(x64GenContext, v);
}
void x64Gen_movaps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSource)
{
// SSE
// copy xmm register
// MOVAPS <xmm>, <xmm>
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSource, xmmRegisterDest, false); // tested
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x28); // alternative encoding: 0x29, source and destination register are exchanged
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSource&7));
}
void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
// move two doubles from memory into xmm register
// MOVUPD <xmm>, [<reg>+<imm>]
if( memRegister == REG_ESP )
{
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
// 66 0F 10 84 E4 23 01 00 00
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x10);
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else if( memRegister == REG_NONE )
{
assert_dbg();
//x64Gen_writeU8(x64GenContext, 0x66);
//x64Gen_writeU8(x64GenContext, 0x0F);
//x64Gen_writeU8(x64GenContext, 0x10);
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
//x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
// move two doubles from memory into xmm register
// MOVUPD [<reg>+<imm>], <xmm>
if( memRegister == REG_ESP )
{
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x11);
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else if( memRegister == REG_NONE )
{
assert_dbg();
//x64Gen_writeU8(x64GenContext, 0x66);
//x64Gen_writeU8(x64GenContext, 0x0F);
//x64Gen_writeU8(x64GenContext, 0x11);
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
//x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE3
// move one double from memory into lower and upper half of a xmm register
if( memRegister == REG_RSP )
{
// MOVDDUP <xmm>, [<reg>+<imm>]
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
x64Gen_writeU8(x64GenContext, 0xF2);
if( xmmRegister >= 8 )
x64Gen_writeU8(x64GenContext, 0x44);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x12);
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else if( memRegister == REG_R15 )
{
// MOVDDUP <xmm>, [<reg>+<imm>]
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
// F2 41 0F 12 87 - 44 33 22 11
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, true);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x12);
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else if( memRegister == REG_NONE )
{
// MOVDDUP <xmm>, [<imm>]
// 36 F2 0F 12 05 - 00 00 00 00
assert_dbg();
//x64Gen_writeU8(x64GenContext, 0x36);
//x64Gen_writeU8(x64GenContext, 0xF2);
//x64Gen_writeU8(x64GenContext, 0x0F);
//x64Gen_writeU8(x64GenContext, 0x12);
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
//x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE3
// move low double from xmm register into lower and upper half of a different xmm register
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x12);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE1
// move high double from xmm register into lower and upper half of a different xmm register
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x12);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// move lower double from xmm register into lower half of a different xmm register, leave other half untouched
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x10); // alternative encoding: 0x11, src and dest exchanged
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
// move lower 64bits (double) of xmm register to memory location
if( memRegister == REG_NONE )
{
// MOVSD [<imm>], <xmm>
// F2 0F 11 05 - 45 23 01 00
assert_dbg();
//x64Gen_writeU8(x64GenContext, 0xF2);
//x64Gen_genSSEVEXPrefix(x64GenContext, xmmRegister, 0, false);
//x64Gen_writeU8(x64GenContext, 0x0F);
//x64Gen_writeU8(x64GenContext, 0x11);
//x64Gen_writeU8(x64GenContext, 0x05+xmmRegister*8);
//x64Gen_writeU32(x64GenContext, memImmU32);
}
else if( memRegister == REG_RSP )
{
// MOVSD [RSP+<imm>], <xmm>
// F2 0F 11 84 24 - 33 22 11 00
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x11);
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
x64Gen_writeU8(x64GenContext, 0x24);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE3
// move one double from memory into lower half of a xmm register, leave upper half unchanged(?)
if( memRegister == REG_NONE )
{
// MOVLPD <xmm>, [<imm>]
//x64Gen_writeU8(x64GenContext, 0x66);
//x64Gen_writeU8(x64GenContext, 0x0F);
//x64Gen_writeU8(x64GenContext, 0x12);
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
//x64Gen_writeU32(x64GenContext, memImmU32);
assert_dbg();
}
else if( memRegister == REG_RSP )
{
// MOVLPD <xmm>, [<reg64>+<imm>]
// 66 0F 12 84 24 - 33 22 11 00
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x12);
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
x64Gen_writeU8(x64GenContext, 0x24);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x14);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x15);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc, uint8 imm8)
{
// SSE2
// shuffled copy source to destination
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0xC6);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
x64Gen_writeU8(x64GenContext, imm8);
}
void x64Gen_addsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// add bottom double of two xmm registers, leave upper quadword unchanged
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x58);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_addpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// add both doubles of two xmm registers
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x58);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_subsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// subtract bottom double of two xmm registers, leave upper quadword unchanged
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x5C);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_subpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// subtract both doubles of two xmm registers
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x5C);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_mulsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// multiply bottom double of two xmm registers, leave upper quadword unchanged
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x59);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// multiply both doubles of two xmm registers
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x59);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
if (memRegister == REG_NONE)
{
assert_dbg();
}
else if (memRegister == REG_R14)
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x59);
x64Gen_writeU8(x64GenContext, 0x86 + (xmmRegister & 7) * 8);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_divsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// divide bottom double of two xmm registers, leave upper quadword unchanged
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x5E);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_divpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// divide bottom and top double of two xmm registers
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x5E);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_comisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// compare bottom doubles
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2F);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32)
{
// SSE2
// compare bottom double with double from memory location
if( memoryReg == REG_R15 )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2F);
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
}
else
assert_dbg();
}
void x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// compare bottom doubles
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2E);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32)
{
// SSE2
// compare bottom float with float from memory location
if (memoryReg == REG_R15)
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2F);
x64Gen_writeU8(x64GenContext, 0x87 + (xmmRegisterDest & 7) * 8);
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
}
else
assert_dbg();
}
void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
{
// SSE2
// and xmm register with 128 bit value from memory
if( memReg == REG_R15 )
{
x64Gen_genSSEVEXPrefix2(x64GenContext, memReg, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x56);
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
}
else
assert_dbg();
}
void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
{
// SSE2
// xor xmm register with 128 bit value from memory
if( memReg == REG_R15 )
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x57);
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
}
else
assert_dbg();
}
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
if (memRegister == REG_NONE)
{
assert_dbg();
}
else if (memRegister == REG_R14)
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x54);
x64Gen_writeU8(x64GenContext, 0x86 + (xmmRegister & 7) * 8);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
{
// SSE2
// and xmm register with 128 bit value from memory
if( memReg == REG_R15 )
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x54);
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
}
else
assert_dbg();
}
void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// and xmm register with xmm register
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x54);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
{
// SSE2
// doubleword integer compare
if( memReg == REG_R15 )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x76);
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
}
else
assert_dbg();
}
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// convert two doubles into two 32-bit integers in bottom part of xmm register, reset upper 64 bits of destination register
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0xE6);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
{
// SSE2
// convert double to truncated integer in general purpose register
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2C);
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// converts bottom 64bit double to bottom 32bit single
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x5A);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// converts two 64bit doubles to two 32bit singles in bottom half of register
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x5A);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// converts two 32bit singles to two 64bit doubles
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x5A);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// converts bottom 32bit single to bottom 64bit double
x64Gen_writeU8(x64GenContext, 0xF3);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x5A);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memReg, sint32 memImmS32)
{
// SSE2
// converts two signed 32bit integers to two doubles
if( memReg == REG_RSP )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2A);
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegisterDest&7)*8);
x64Gen_writeU8(x64GenContext, 0x24);
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
}
else
{
assert_dbg();
}
}
void x64Gen_cvtsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
{
// SSE2
// converts bottom 64bit double to 32bit signed integer in general purpose register, round based on float-point control
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2D);
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
{
// SSE2
// converts bottom 64bit double to 32bit signed integer in general purpose register, always truncate
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2C);
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_sqrtsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// calculates square root of bottom double
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x51);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_sqrtpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// calculates square root of bottom and top double
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x51);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
// approximates reciprocal of bottom 32bit single
x64Gen_writeU8(x64GenContext, 0xF3);
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x53);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
if( memRegister == REG_NONE )
{
assert_dbg();
}
else if( memRegister == 15 )
{
x64Gen_writeU8(x64GenContext, 0xF3);
x64Gen_writeU8(x64GenContext, (xmmRegister<8)?0x41:0x45);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x59);
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_movd_xmmReg_reg64Low32(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
{
// SSE2
// copy low 32bit of general purpose register into xmm register
// MOVD <xmm>, <reg32>
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x6E); // alternative encoding: 0x29, source and destination register are exchanged
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
}
void x64Gen_movd_reg64Low32_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
{
// SSE2
// copy low 32bit of general purpose register into xmm register
// MOVD <reg32>, <xmm>
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, registerDest, xmmRegisterSrc, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x7E); // alternative encoding: 0x29, source and destination register are exchanged
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterSrc&7)*8+(registerDest&7));
}
void x64Gen_movq_xmmReg_reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
{
// SSE2
// copy general purpose register into xmm register
// MOVD <xmm>, <reg64>
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, true);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x6E); // alternative encoding: 0x29, source and destination register are exchanged
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
}
void x64Gen_movq_reg64_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 xmmRegisterSrc)
{
// SSE2
// copy general purpose register into xmm register
// MOVD <xmm>, <reg64>
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix2(x64GenContext, registerDst, xmmRegisterSrc, true);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x7E);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterSrc&7)*8+(registerDst&7));
}

View file

@ -0,0 +1,360 @@
template<uint8 op0, bool rex64Bit = false>
class x64_opc_1byte
{
public:
static void emitBytes(x64GenContext_t* x64GenContext)
{
// write out op0
x64Gen_writeU8(x64GenContext, op0);
}
static constexpr bool isRevOrder()
{
return false;
}
static constexpr bool hasRex64BitPrefix()
{
return rex64Bit;
}
};
template<uint8 op0, bool rex64Bit = false>
class x64_opc_1byte_rev
{
public:
static void emitBytes(x64GenContext_t* x64GenContext)
{
// write out op0
x64Gen_writeU8(x64GenContext, op0);
}
static constexpr bool isRevOrder()
{
return true;
}
static constexpr bool hasRex64BitPrefix()
{
return rex64Bit;
}
};
template<uint8 op0, uint8 op1, bool rex64Bit = false>
class x64_opc_2byte
{
public:
static void emitBytes(x64GenContext_t* x64GenContext)
{
x64Gen_writeU8(x64GenContext, op0);
x64Gen_writeU8(x64GenContext, op1);
}
static constexpr bool isRevOrder()
{
return false;
}
static constexpr bool hasRex64BitPrefix()
{
return rex64Bit;
}
};
enum class MODRM_OPR_TYPE
{
REG,
MEM
};
class x64MODRM_opr_reg64
{
public:
x64MODRM_opr_reg64(uint8 reg)
{
this->reg = reg;
}
static constexpr MODRM_OPR_TYPE getType()
{
return MODRM_OPR_TYPE::REG;
}
const uint8 getReg() const
{
return reg;
}
private:
uint8 reg;
};
class x64MODRM_opr_memReg64
{
public:
x64MODRM_opr_memReg64(uint8 reg)
{
this->reg = reg;
this->offset = 0;
}
x64MODRM_opr_memReg64(uint8 reg, sint32 offset)
{
this->reg = reg;
this->offset = offset;
}
static constexpr MODRM_OPR_TYPE getType()
{
return MODRM_OPR_TYPE::MEM;
}
const uint8 getBaseReg() const
{
return reg;
}
const uint32 getOffset() const
{
return (uint32)offset;
}
static constexpr bool hasBaseReg()
{
return true;
}
static constexpr bool hasIndexReg()
{
return false;
}
private:
uint8 reg;
sint32 offset;
};
class x64MODRM_opr_memRegPlusReg
{
public:
x64MODRM_opr_memRegPlusReg(uint8 regBase, uint8 regIndex)
{
if ((regIndex & 7) == 4)
{
// cant encode RSP/R12 in index register, switch with base register
// this only works if the scaler is 1
std::swap(regBase, regIndex);
cemu_assert((regBase & 7) != 4);
}
this->regBase = regBase;
this->regIndex = regIndex;
this->offset = 0;
}
x64MODRM_opr_memRegPlusReg(uint8 regBase, uint8 regIndex, sint32 offset)
{
if ((regIndex & 7) == 4)
{
std::swap(regBase, regIndex);
cemu_assert((regIndex & 7) != 4);
}
this->regBase = regBase;
this->regIndex = regIndex;
this->offset = offset;
}
static constexpr MODRM_OPR_TYPE getType()
{
return MODRM_OPR_TYPE::MEM;
}
const uint8 getBaseReg() const
{
return regBase;
}
const uint8 getIndexReg()
{
return regIndex;
}
const uint32 getOffset() const
{
return (uint32)offset;
}
static constexpr bool hasBaseReg()
{
return true;
}
static constexpr bool hasIndexReg()
{
return true;
}
private:
uint8 regBase;
uint8 regIndex; // multiplied by scaler which is fixed to 1
sint32 offset;
};
template<class opcodeBytes, typename TA, typename TB>
void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB)
{
static_assert(TA::getType() == MODRM_OPR_TYPE::REG);
x64Gen_checkBuffer(x64GenContext);
// REX prefix
// 0100 WRXB
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
{
if (opA.getReg() & 8 || opB.getReg() & 8 || opcodeBytes::hasRex64BitPrefix())
{
// opA -> REX.B
// baseReg -> REX.R
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getReg() & 8) ? (1 << 0) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
}
}
else if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::MEM)
{
if constexpr (opB.hasBaseReg() && opB.hasIndexReg())
{
if (opA.getReg() & 8 || opB.getBaseReg() & 8 || opB.getIndexReg() & 8 || opcodeBytes::hasRex64BitPrefix())
{
// opA -> REX.B
// baseReg -> REX.R
// indexReg -> REX.X
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getBaseReg() & 8) ? (1 << 0) : 0) | ((opB.getIndexReg() & 8) ? (1 << 1) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
}
}
else if constexpr (opB.hasBaseReg())
{
if (opA.getReg() & 8 || opB.getBaseReg() & 8 || opcodeBytes::hasRex64BitPrefix())
{
// opA -> REX.B
// baseReg -> REX.R
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getBaseReg() & 8) ? (1 << 0) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
}
}
else
{
if (opA.getReg() & 8 || opcodeBytes::hasRex64BitPrefix())
{
// todo - verify
// opA -> REX.B
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
}
}
}
// opcode
opcodeBytes::emitBytes(x64GenContext);
// modrm byte
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
{
// reg, reg
x64Gen_writeU8(x64GenContext, 0xC0 + (opB.getReg() & 7) + ((opA.getReg() & 7) << 3));
}
else if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::MEM)
{
if constexpr (TB::hasBaseReg() == false) // todo - also check for index reg and secondary sib reg
{
// form: [offset]
// instruction is just offset
cemu_assert(false);
}
else if constexpr (TB::hasIndexReg())
{
// form: [base+index*scaler+offset], scaler is currently fixed to 1
cemu_assert((opB.getIndexReg() & 7) != 4); // RSP not allowed as index register
const uint32 offset = opB.getOffset();
if (offset == 0 && (opB.getBaseReg() & 7) != 5) // RBP/R13 has special meaning in no-offset encoding
{
// [form: index*1+base]
x64Gen_writeU8(x64GenContext, 0x00 + (4) + ((opA.getReg() & 7) << 3));
// SIB byte
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg()&7) << 3) + (opB.getBaseReg() & 7));
}
else if (offset == (uint32)(sint32)(sint8)offset)
{
// [form: index*1+base+sbyte]
x64Gen_writeU8(x64GenContext, 0x40 + (4) + ((opA.getReg() & 7) << 3));
// SIB byte
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg() & 7) << 3) + (opB.getBaseReg() & 7));
x64Gen_writeU8(x64GenContext, (uint8)offset);
}
else
{
// [form: index*1+base+sdword]
x64Gen_writeU8(x64GenContext, 0x80 + (4) + ((opA.getReg() & 7) << 3));
// SIB byte
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg() & 7) << 3) + (opB.getBaseReg() & 7));
x64Gen_writeU32(x64GenContext, (uint32)offset);
}
}
else
{
// form: [baseReg + offset]
const uint32 offset = opB.getOffset();
if (offset == 0 && (opB.getBaseReg() & 7) != 5) // RBP/R13 has special meaning in no-offset encoding
{
// form: [baseReg]
// if base reg is RSP/R12 we need to use SIB form of instruction
if ((opB.getBaseReg() & 7) == 4)
{
x64Gen_writeU8(x64GenContext, 0x00 + (4) + ((opA.getReg() & 7) << 3));
// SIB byte [form: none*1+base]
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
}
else
{
x64Gen_writeU8(x64GenContext, 0x00 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
}
}
else if (offset == (uint32)(sint32)(sint8)offset)
{
// form: [baseReg+sbyte]
// if base reg is RSP/R12 we need to use SIB form of instruction
if ((opB.getBaseReg() & 7) == 4)
{
x64Gen_writeU8(x64GenContext, 0x40 + (4) + ((opA.getReg() & 7) << 3));
// SIB byte [form: none*1+base]
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
}
else
{
x64Gen_writeU8(x64GenContext, 0x40 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
}
x64Gen_writeU8(x64GenContext, (uint8)offset);
}
else
{
// form: [baseReg+sdword]
// if base reg is RSP/R12 we need to use SIB form of instruction
if ((opB.getBaseReg() & 7) == 4)
{
x64Gen_writeU8(x64GenContext, 0x80 + (4) + ((opA.getReg() & 7) << 3));
// SIB byte [form: none*1+base]
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
}
else
{
x64Gen_writeU8(x64GenContext, 0x80 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
}
x64Gen_writeU32(x64GenContext, (uint32)offset);
}
}
}
else
{
assert_dbg();
}
}
template<class opcodeBytes, typename TA, typename TB>
void x64Gen_writeMODRM_dyn(x64GenContext_t* x64GenContext, TA opLeft, TB opRight)
{
if constexpr (opcodeBytes::isRevOrder())
_x64Gen_writeMODRM_internal<opcodeBytes, TB, TA>(x64GenContext, opRight, opLeft);
else
_x64Gen_writeMODRM_internal<opcodeBytes, TA, TB>(x64GenContext, opLeft, opRight);
}