mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-14 10:48:28 +12:00
Add all the files
This commit is contained in:
parent
e3db07a16a
commit
d60742f52b
1445 changed files with 430238 additions and 0 deletions
11
src/Cafe/HW/Espresso/Const.h
Normal file
11
src/Cafe/HW/Espresso/Const.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
namespace Espresso
|
||||
{
|
||||
constexpr inline int CORE_COUNT = 3;
|
||||
|
||||
constexpr inline uint64 CORE_CLOCK = 1243125000;
|
||||
constexpr inline uint64 BUS_CLOCK = 248625000;
|
||||
constexpr inline uint64 TIMER_CLOCK = BUS_CLOCK / 4;
|
||||
|
||||
};
|
5
src/Cafe/HW/Espresso/Debugger/DebugSymbolStorage.cpp
Normal file
5
src/Cafe/HW/Espresso/Debugger/DebugSymbolStorage.cpp
Normal file
|
@ -0,0 +1,5 @@
|
|||
#include "Common/precompiled.h"
|
||||
#include "DebugSymbolStorage.h"
|
||||
|
||||
FSpinlock DebugSymbolStorage::s_lock;
|
||||
std::unordered_map<MPTR, DEBUG_SYMBOL_TYPE> DebugSymbolStorage::s_typeStorage;
|
63
src/Cafe/HW/Espresso/Debugger/DebugSymbolStorage.h
Normal file
63
src/Cafe/HW/Espresso/Debugger/DebugSymbolStorage.h
Normal file
|
@ -0,0 +1,63 @@
|
|||
#pragma once
|
||||
#include "util/helpers/fspinlock.h"
|
||||
|
||||
enum class DEBUG_SYMBOL_TYPE
|
||||
{
|
||||
UNDEFINED,
|
||||
CODE,
|
||||
// big-endian types
|
||||
U64,
|
||||
U32,
|
||||
U16,
|
||||
U8,
|
||||
S64,
|
||||
S32,
|
||||
S16,
|
||||
S8,
|
||||
FLOAT,
|
||||
DOUBLE,
|
||||
};
|
||||
|
||||
|
||||
class DebugSymbolStorage
|
||||
{
|
||||
public:
|
||||
static void StoreDataType(MPTR address, DEBUG_SYMBOL_TYPE type)
|
||||
{
|
||||
s_lock.acquire();
|
||||
s_typeStorage[address] = type;
|
||||
s_lock.release();
|
||||
}
|
||||
|
||||
static DEBUG_SYMBOL_TYPE GetDataType(MPTR address)
|
||||
{
|
||||
s_lock.acquire();
|
||||
auto itr = s_typeStorage.find(address);
|
||||
if (itr == s_typeStorage.end())
|
||||
{
|
||||
s_lock.release();
|
||||
return DEBUG_SYMBOL_TYPE::UNDEFINED;
|
||||
}
|
||||
DEBUG_SYMBOL_TYPE t = itr->second;
|
||||
s_lock.release();
|
||||
return t;
|
||||
}
|
||||
|
||||
static void ClearRange(MPTR address, uint32 length)
|
||||
{
|
||||
s_lock.acquire();
|
||||
while (length > 0)
|
||||
{
|
||||
auto itr = s_typeStorage.find(address);
|
||||
if (itr != s_typeStorage.end())
|
||||
s_typeStorage.erase(itr);
|
||||
address += 4;
|
||||
length -= 4;
|
||||
}
|
||||
s_lock.release();
|
||||
}
|
||||
|
||||
private:
|
||||
static FSpinlock s_lock;
|
||||
static std::unordered_map<MPTR, DEBUG_SYMBOL_TYPE> s_typeStorage;
|
||||
};
|
573
src/Cafe/HW/Espresso/Debugger/Debugger.cpp
Normal file
573
src/Cafe/HW/Espresso/Debugger/Debugger.cpp
Normal file
|
@ -0,0 +1,573 @@
|
|||
#include "gui/guiWrapper.h"
|
||||
#include "Debugger.h"
|
||||
#include "Cemu/PPCAssembler/ppcAssembler.h"
|
||||
#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h"
|
||||
#include "Cemu/ExpressionParser/ExpressionParser.h"
|
||||
|
||||
#include "gui/debugger/DebuggerWindow2.h"
|
||||
|
||||
#include "Cafe/OS/libs/coreinit/coreinit.h"
|
||||
|
||||
#if BOOST_OS_WINDOWS > 0
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
debuggerState_t debuggerState{ };
|
||||
|
||||
DebuggerBreakpoint* debugger_getFirstBP(uint32 address)
|
||||
{
|
||||
for (auto& it : debuggerState.breakpoints)
|
||||
{
|
||||
if (it->address == address)
|
||||
return it;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DebuggerBreakpoint* debugger_getFirstBP(uint32 address, uint8 bpType)
|
||||
{
|
||||
for (auto& it : debuggerState.breakpoints)
|
||||
{
|
||||
if (it->address == address)
|
||||
{
|
||||
DebuggerBreakpoint* bpItr = it;
|
||||
while (bpItr)
|
||||
{
|
||||
if (bpItr->bpType == bpType)
|
||||
return bpItr;
|
||||
bpItr = bpItr->next;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool debuggerBPChain_hasType(DebuggerBreakpoint* bp, uint8 bpType)
|
||||
{
|
||||
while (bp)
|
||||
{
|
||||
if (bp->bpType == bpType)
|
||||
return true;
|
||||
bp = bp->next;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void debuggerBPChain_add(uint32 address, DebuggerBreakpoint* bp)
|
||||
{
|
||||
bp->next = nullptr;
|
||||
DebuggerBreakpoint* existingBP = debugger_getFirstBP(address);
|
||||
if (existingBP)
|
||||
{
|
||||
while (existingBP->next)
|
||||
existingBP = existingBP->next;
|
||||
existingBP->next = bp;
|
||||
return;
|
||||
}
|
||||
// no breakpoint chain exists for this address
|
||||
debuggerState.breakpoints.push_back(bp);
|
||||
}
|
||||
|
||||
uint32 debugger_getAddressOriginalOpcode(uint32 address)
|
||||
{
|
||||
auto bpItr = debugger_getFirstBP(address);
|
||||
while (bpItr)
|
||||
{
|
||||
if (bpItr->bpType == DEBUGGER_BP_T_NORMAL || bpItr->bpType == DEBUGGER_BP_T_ONE_SHOT)
|
||||
return bpItr->originalOpcodeValue;
|
||||
bpItr = bpItr->next;
|
||||
}
|
||||
return memory_readU32(address);
|
||||
}
|
||||
|
||||
void debugger_updateMemoryU32(uint32 address, uint32 newValue)
|
||||
{
|
||||
bool memChanged = false;
|
||||
if (newValue != memory_readU32(address))
|
||||
memChanged = true;
|
||||
memory_writeU32(address, newValue);
|
||||
if(memChanged)
|
||||
PPCRecompiler_invalidateRange(address, address + 4);
|
||||
}
|
||||
|
||||
void debugger_updateExecutionBreakpoint(uint32 address, bool forceRestore)
|
||||
{
|
||||
auto bpItr = debugger_getFirstBP(address);
|
||||
bool hasBP = false;
|
||||
uint32 originalOpcodeValue;
|
||||
while (bpItr)
|
||||
{
|
||||
if (bpItr->isExecuteBP())
|
||||
{
|
||||
if (bpItr->enabled && forceRestore == false)
|
||||
{
|
||||
// write TW instruction to memory
|
||||
debugger_updateMemoryU32(address, (31 << 26) | (4 << 1));
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
originalOpcodeValue = bpItr->originalOpcodeValue;
|
||||
hasBP = true;
|
||||
}
|
||||
}
|
||||
bpItr = bpItr->next;
|
||||
}
|
||||
if (hasBP)
|
||||
{
|
||||
// restore instruction
|
||||
debugger_updateMemoryU32(address, originalOpcodeValue);
|
||||
}
|
||||
}
|
||||
|
||||
void debugger_createExecuteBreakpoint(uint32 address)
|
||||
{
|
||||
// check if breakpoint already exists
|
||||
auto existingBP = debugger_getFirstBP(address);
|
||||
if (existingBP && debuggerBPChain_hasType(existingBP, DEBUGGER_BP_T_NORMAL))
|
||||
return; // breakpoint already exists
|
||||
// get original opcode at address
|
||||
uint32 originalOpcode = debugger_getAddressOriginalOpcode(address);
|
||||
// init breakpoint object
|
||||
DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, originalOpcode, DEBUGGER_BP_T_NORMAL, true);
|
||||
debuggerBPChain_add(address, bp);
|
||||
debugger_updateExecutionBreakpoint(address);
|
||||
}
|
||||
|
||||
void debugger_createSingleShotExecuteBreakpoint(uint32 address)
|
||||
{
|
||||
// check if breakpoint already exists
|
||||
auto existingBP = debugger_getFirstBP(address);
|
||||
if (existingBP && debuggerBPChain_hasType(existingBP, DEBUGGER_BP_T_ONE_SHOT))
|
||||
return; // breakpoint already exists
|
||||
// get original opcode at address
|
||||
uint32 originalOpcode = debugger_getAddressOriginalOpcode(address);
|
||||
// init breakpoint object
|
||||
DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, originalOpcode, DEBUGGER_BP_T_ONE_SHOT, true);
|
||||
debuggerBPChain_add(address, bp);
|
||||
debugger_updateExecutionBreakpoint(address);
|
||||
}
|
||||
|
||||
namespace coreinit
|
||||
{
|
||||
std::vector<std::thread::native_handle_type>& OSGetSchedulerThreads();
|
||||
}
|
||||
|
||||
void debugger_updateMemoryBreakpoint(DebuggerBreakpoint* bp)
|
||||
{
|
||||
std::vector<std::thread::native_handle_type> schedulerThreadHandles = coreinit::OSGetSchedulerThreads();
|
||||
|
||||
#if BOOST_OS_WINDOWS > 0
|
||||
debuggerState.activeMemoryBreakpoint = bp;
|
||||
for (auto& hThreadNH : schedulerThreadHandles)
|
||||
{
|
||||
HANDLE hThread = (HANDLE)hThreadNH;
|
||||
CONTEXT ctx{};
|
||||
ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS;
|
||||
SuspendThread(hThread);
|
||||
GetThreadContext(hThread, &ctx);
|
||||
if (debuggerState.activeMemoryBreakpoint)
|
||||
{
|
||||
ctx.Dr0 = (DWORD64)memory_getPointerFromVirtualOffset(bp->address);
|
||||
ctx.Dr1 = (DWORD64)memory_getPointerFromVirtualOffset(bp->address);
|
||||
ctx.Dr7 = 1 | (1 << 16) | (3 << 18); // enable dr0, track write, 4 byte length
|
||||
ctx.Dr7 |= (4 | (3 << 20) | (3 << 22)); // enable dr1, track read+write, 4 byte length
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx.Dr0 = (DWORD64)0;
|
||||
ctx.Dr1 = (DWORD64)0;
|
||||
ctx.Dr7 = 0; // disable dr0
|
||||
}
|
||||
SetThreadContext(hThread, &ctx);
|
||||
ResumeThread(hThread);
|
||||
}
|
||||
#else
|
||||
cemuLog_log(LogType::Force, "Debugger breakpoints are not supported");
|
||||
#endif
|
||||
}
|
||||
|
||||
void debugger_handleSingleStepException(uint32 drMask)
|
||||
{
|
||||
bool triggeredDR0 = (drMask & (1 << 0)) != 0; // write
|
||||
bool triggeredDR1 = (drMask & (1 << 1)) != 0; // read
|
||||
bool catchBP = false;
|
||||
if (triggeredDR0 && triggeredDR1)
|
||||
{
|
||||
// write (and read) access
|
||||
if (debuggerState.activeMemoryBreakpoint && debuggerState.activeMemoryBreakpoint->bpType == DEBUGGER_BP_T_MEMORY_WRITE)
|
||||
catchBP = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// read access
|
||||
if (debuggerState.activeMemoryBreakpoint && debuggerState.activeMemoryBreakpoint->bpType == DEBUGGER_BP_T_MEMORY_READ)
|
||||
catchBP = true;
|
||||
}
|
||||
if (catchBP)
|
||||
{
|
||||
debugger_createSingleShotExecuteBreakpoint(ppcInterpreterCurrentInstance->instructionPointer + 4);
|
||||
}
|
||||
}
|
||||
|
||||
void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite)
|
||||
{
|
||||
// init breakpoint object
|
||||
uint8 bpType;
|
||||
if (onRead && onWrite)
|
||||
assert_dbg();
|
||||
else if (onRead)
|
||||
bpType = DEBUGGER_BP_T_MEMORY_READ;
|
||||
else
|
||||
bpType = DEBUGGER_BP_T_MEMORY_WRITE;
|
||||
|
||||
DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, 0xFFFFFFFF, bpType, true);
|
||||
debuggerBPChain_add(address, bp);
|
||||
// disable any already existing memory breakpoint
|
||||
if (debuggerState.activeMemoryBreakpoint)
|
||||
{
|
||||
debuggerState.activeMemoryBreakpoint->enabled = false;
|
||||
debuggerState.activeMemoryBreakpoint = nullptr;
|
||||
}
|
||||
// activate new breakpoint
|
||||
debugger_updateMemoryBreakpoint(bp);
|
||||
}
|
||||
|
||||
void debugger_handleEntryBreakpoint(uint32 address)
|
||||
{
|
||||
if (!debuggerState.breakOnEntry)
|
||||
return;
|
||||
|
||||
debugger_createExecuteBreakpoint(address);
|
||||
}
|
||||
|
||||
void debugger_deleteBreakpoint(DebuggerBreakpoint* bp)
|
||||
{
|
||||
for (auto& it : debuggerState.breakpoints)
|
||||
{
|
||||
if (it->address == bp->address)
|
||||
{
|
||||
// for execution breakpoints make sure the instruction is restored
|
||||
if (bp->isExecuteBP())
|
||||
{
|
||||
bp->enabled = false;
|
||||
debugger_updateExecutionBreakpoint(bp->address);
|
||||
}
|
||||
// remove
|
||||
if (it == bp)
|
||||
{
|
||||
// remove first in list
|
||||
debuggerState.breakpoints.erase(std::remove(debuggerState.breakpoints.begin(), debuggerState.breakpoints.end(), bp), debuggerState.breakpoints.end());
|
||||
DebuggerBreakpoint* nextBP = bp->next;
|
||||
if (nextBP)
|
||||
debuggerState.breakpoints.push_back(nextBP);
|
||||
}
|
||||
else
|
||||
{
|
||||
// remove from list
|
||||
DebuggerBreakpoint* bpItr = it;
|
||||
while (bpItr->next != bp)
|
||||
{
|
||||
bpItr = bpItr->next;
|
||||
}
|
||||
cemu_assert_debug(bpItr->next != bp);
|
||||
bpItr->next = bp->next;
|
||||
}
|
||||
delete bp;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void debugger_toggleExecuteBreakpoint(uint32 address)
|
||||
{
|
||||
auto existingBP = debugger_getFirstBP(address, DEBUGGER_BP_T_NORMAL);
|
||||
if (existingBP)
|
||||
{
|
||||
// delete existing breakpoint
|
||||
debugger_deleteBreakpoint(existingBP);
|
||||
return;
|
||||
}
|
||||
// create new
|
||||
debugger_createExecuteBreakpoint(address);
|
||||
}
|
||||
|
||||
void debugger_forceBreak()
|
||||
{
|
||||
debuggerState.debugSession.shouldBreak = true;
|
||||
}
|
||||
|
||||
bool debugger_isTrapped()
|
||||
{
|
||||
return debuggerState.debugSession.isTrapped;
|
||||
}
|
||||
|
||||
void debugger_resume()
|
||||
{
|
||||
// if there is a breakpoint on the current instruction then do a single 'step into' to skip it
|
||||
debuggerState.debugSession.run = true;
|
||||
}
|
||||
|
||||
void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp)
|
||||
{
|
||||
DebuggerBreakpoint* bpItr = debugger_getFirstBP(address);
|
||||
while (bpItr)
|
||||
{
|
||||
if (bpItr == bp)
|
||||
{
|
||||
if (bpItr->bpType == DEBUGGER_BP_T_NORMAL)
|
||||
{
|
||||
bp->enabled = state;
|
||||
debugger_updateExecutionBreakpoint(address);
|
||||
debuggerWindow_updateViewThreadsafe2();
|
||||
}
|
||||
else if (bpItr->isMemBP())
|
||||
{
|
||||
// disable other memory breakpoints
|
||||
for (auto& it : debuggerState.breakpoints)
|
||||
{
|
||||
DebuggerBreakpoint* bpItr2 = it;
|
||||
while (bpItr2)
|
||||
{
|
||||
if (bpItr2->isMemBP() && bpItr2 != bp)
|
||||
{
|
||||
bpItr2->enabled = false;
|
||||
}
|
||||
bpItr2 = bpItr2->next;
|
||||
}
|
||||
}
|
||||
bpItr->enabled = state;
|
||||
if (state)
|
||||
debugger_updateMemoryBreakpoint(bpItr);
|
||||
else
|
||||
debugger_updateMemoryBreakpoint(nullptr);
|
||||
debuggerWindow_updateViewThreadsafe2();
|
||||
}
|
||||
return;
|
||||
}
|
||||
bpItr = bpItr->next;
|
||||
}
|
||||
}
|
||||
|
||||
void debugger_createPatch(uint32 address, std::span<uint8> patchData)
|
||||
{
|
||||
DebuggerPatch* patch = new DebuggerPatch();
|
||||
patch->address = address;
|
||||
patch->length = patchData.size();
|
||||
patch->data.resize(4);
|
||||
patch->origData.resize(4);
|
||||
memcpy(&patch->data.front(), patchData.data(), patchData.size());
|
||||
memcpy(&patch->origData.front(), memory_getPointerFromVirtualOffset(address), patchData.size());
|
||||
// get original data from breakpoints
|
||||
if ((address & 3) != 0)
|
||||
cemu_assert_debug(false);
|
||||
for (sint32 i = 0; i < patchData.size() / 4; i++)
|
||||
{
|
||||
DebuggerBreakpoint* bpItr = debugger_getFirstBP(address);
|
||||
while (bpItr)
|
||||
{
|
||||
if (bpItr->isExecuteBP())
|
||||
{
|
||||
*(uint32*)(&patch->origData.front() + i * 4) = _swapEndianU32(bpItr->originalOpcodeValue);
|
||||
}
|
||||
bpItr = bpItr->next;
|
||||
}
|
||||
}
|
||||
// merge with existing patches if the ranges touch
|
||||
for(sint32 i=0; i<debuggerState.patches.size(); i++)
|
||||
{
|
||||
auto& patchItr = debuggerState.patches[i];
|
||||
if (address + patchData.size() >= patchItr->address && address <= patchItr->address + patchItr->length)
|
||||
{
|
||||
uint32 newAddress = std::min(patch->address, patchItr->address);
|
||||
uint32 newEndAddress = std::max(patch->address + patch->length, patchItr->address + patchItr->length);
|
||||
uint32 newLength = newEndAddress - newAddress;
|
||||
|
||||
DebuggerPatch* newPatch = new DebuggerPatch();
|
||||
newPatch->address = newAddress;
|
||||
newPatch->length = newLength;
|
||||
newPatch->data.resize(newLength);
|
||||
newPatch->origData.resize(newLength);
|
||||
memcpy(&newPatch->data.front() + (address - newAddress), &patch->data.front(), patch->length);
|
||||
memcpy(&newPatch->data.front() + (patchItr->address - newAddress), &patchItr->data.front(), patchItr->length);
|
||||
|
||||
memcpy(&newPatch->origData.front() + (address - newAddress), &patch->origData.front(), patch->length);
|
||||
memcpy(&newPatch->origData.front() + (patchItr->address - newAddress), &patchItr->origData.front(), patchItr->length);
|
||||
|
||||
delete patch;
|
||||
patch = newPatch;
|
||||
delete patchItr;
|
||||
// remove currently iterated patch
|
||||
debuggerState.patches.erase(debuggerState.patches.begin()+i);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
debuggerState.patches.push_back(patch);
|
||||
// apply patch (if breakpoints exist then update those instead of actual data)
|
||||
if ((address & 3) != 0)
|
||||
cemu_assert_debug(false);
|
||||
if ((patchData.size() & 3) != 0)
|
||||
cemu_assert_debug(false);
|
||||
for (sint32 i = 0; i < patchData.size() / 4; i++)
|
||||
{
|
||||
DebuggerBreakpoint* bpItr = debugger_getFirstBP(address);
|
||||
bool hasActiveExecuteBP = false;
|
||||
while (bpItr)
|
||||
{
|
||||
if (bpItr->isExecuteBP())
|
||||
{
|
||||
bpItr->originalOpcodeValue = *(uint32be*)(patchData.data() + i * 4);
|
||||
if (bpItr->enabled)
|
||||
hasActiveExecuteBP = true;
|
||||
}
|
||||
bpItr = bpItr->next;
|
||||
}
|
||||
if (hasActiveExecuteBP == false)
|
||||
{
|
||||
memcpy(memory_getPointerFromVirtualOffset(address + i * 4), patchData.data() + i * 4, 4);
|
||||
PPCRecompiler_invalidateRange(address, address + 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool debugger_hasPatch(uint32 address)
|
||||
{
|
||||
for (auto& patch : debuggerState.patches)
|
||||
{
|
||||
if (address + 4 > patch->address && address < patch->address + patch->length)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void debugger_stepInto(PPCInterpreter_t* hCPU, bool updateDebuggerWindow = true)
|
||||
{
|
||||
bool isRecEnabled = ppcRecompilerEnabled;
|
||||
ppcRecompilerEnabled = false;
|
||||
uint32 initialIP = debuggerState.debugSession.instructionPointer;
|
||||
debugger_updateExecutionBreakpoint(initialIP, true);
|
||||
PPCInterpreterSlim_executeInstruction(hCPU);
|
||||
debugger_updateExecutionBreakpoint(initialIP);
|
||||
debuggerState.debugSession.instructionPointer = hCPU->instructionPointer;
|
||||
if(updateDebuggerWindow)
|
||||
debuggerWindow_moveIP();
|
||||
ppcRecompilerEnabled = isRecEnabled;
|
||||
}
|
||||
|
||||
bool debugger_stepOver(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
bool isRecEnabled = ppcRecompilerEnabled;
|
||||
ppcRecompilerEnabled = false;
|
||||
// disassemble current instruction
|
||||
PPCDisassembledInstruction disasmInstr = { 0 };
|
||||
uint32 initialIP = debuggerState.debugSession.instructionPointer;
|
||||
debugger_updateExecutionBreakpoint(initialIP, true);
|
||||
ppcAssembler_disassemble(initialIP, memory_readU32(initialIP), &disasmInstr);
|
||||
if (disasmInstr.ppcAsmCode != PPCASM_OP_BL &&
|
||||
disasmInstr.ppcAsmCode != PPCASM_OP_BCTRL)
|
||||
{
|
||||
// nothing to skip, use step-into
|
||||
debugger_stepInto(hCPU);
|
||||
debugger_updateExecutionBreakpoint(initialIP);
|
||||
debuggerWindow_moveIP();
|
||||
ppcRecompilerEnabled = isRecEnabled;
|
||||
return false;
|
||||
}
|
||||
// create one-shot breakpoint at next instruction
|
||||
debugger_createSingleShotExecuteBreakpoint(initialIP +4);
|
||||
// step over current instruction (to avoid breakpoint)
|
||||
debugger_stepInto(hCPU);
|
||||
debuggerWindow_moveIP();
|
||||
// restore breakpoints
|
||||
debugger_updateExecutionBreakpoint(initialIP);
|
||||
// run
|
||||
ppcRecompilerEnabled = isRecEnabled;
|
||||
return true;
|
||||
}
|
||||
|
||||
void debugger_createPPCStateSnapshot(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
memcpy(debuggerState.debugSession.ppcSnapshot.gpr, hCPU->gpr, sizeof(uint32) * 32);
|
||||
memcpy(debuggerState.debugSession.ppcSnapshot.fpr, hCPU->fpr, sizeof(FPR_t) * 32);
|
||||
debuggerState.debugSession.ppcSnapshot.spr_lr = hCPU->spr.LR;
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
debuggerState.debugSession.ppcSnapshot.cr[i] = hCPU->cr[i];
|
||||
}
|
||||
|
||||
void debugger_enterTW(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
debuggerState.debugSession.isTrapped = true;
|
||||
debuggerState.debugSession.debuggedThreadMPTR = coreinitThread_getCurrentThreadMPTRDepr(hCPU);
|
||||
debuggerState.debugSession.instructionPointer = hCPU->instructionPointer;
|
||||
debuggerState.debugSession.hCPU = hCPU;
|
||||
debugger_createPPCStateSnapshot(hCPU);
|
||||
// remove one-shot breakpoint if it exists
|
||||
DebuggerBreakpoint* singleshotBP = debugger_getFirstBP(debuggerState.debugSession.instructionPointer, DEBUGGER_BP_T_ONE_SHOT);
|
||||
if (singleshotBP)
|
||||
debugger_deleteBreakpoint(singleshotBP);
|
||||
debuggerWindow_notifyDebugBreakpointHit2();
|
||||
debuggerWindow_updateViewThreadsafe2();
|
||||
// reset step control
|
||||
debuggerState.debugSession.stepInto = false;
|
||||
debuggerState.debugSession.stepOver = false;
|
||||
debuggerState.debugSession.run = false;
|
||||
while (true)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
// check for step commands
|
||||
if (debuggerState.debugSession.stepOver)
|
||||
{
|
||||
if (debugger_stepOver(hCPU))
|
||||
{
|
||||
debugger_createPPCStateSnapshot(hCPU);
|
||||
break; // if true is returned, continue with execution
|
||||
}
|
||||
debugger_createPPCStateSnapshot(hCPU);
|
||||
debuggerWindow_updateViewThreadsafe2();
|
||||
debuggerState.debugSession.stepOver = false;
|
||||
}
|
||||
if (debuggerState.debugSession.stepInto)
|
||||
{
|
||||
debugger_stepInto(hCPU);
|
||||
debugger_createPPCStateSnapshot(hCPU);
|
||||
debuggerWindow_updateViewThreadsafe2();
|
||||
debuggerState.debugSession.stepInto = false;
|
||||
continue;
|
||||
}
|
||||
if (debuggerState.debugSession.run)
|
||||
{
|
||||
debugger_createPPCStateSnapshot(hCPU);
|
||||
debugger_stepInto(hCPU, false);
|
||||
PPCInterpreterSlim_executeInstruction(hCPU);
|
||||
debuggerState.debugSession.instructionPointer = hCPU->instructionPointer;
|
||||
debuggerState.debugSession.run = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
debuggerState.debugSession.isTrapped = false;
|
||||
debuggerState.debugSession.hCPU = nullptr;
|
||||
debuggerWindow_updateViewThreadsafe2();
|
||||
debuggerWindow_notifyRun();
|
||||
}
|
||||
|
||||
void debugger_shouldBreak(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
if(debuggerState.debugSession.shouldBreak
|
||||
// exclude emulator trampoline area
|
||||
&& (hCPU->instructionPointer < MEMORY_CODE_TRAMPOLINE_AREA_ADDR || hCPU->instructionPointer > MEMORY_CODE_TRAMPOLINE_AREA_ADDR + MEMORY_CODE_TRAMPOLINE_AREA_SIZE))
|
||||
{
|
||||
debuggerState.debugSession.shouldBreak = false;
|
||||
|
||||
const uint32 address = (uint32)hCPU->instructionPointer;
|
||||
assert_dbg();
|
||||
//debugger_createBreakpoint(address, DEBUGGER_BP_TYPE_ONE_SHOT);
|
||||
}
|
||||
}
|
||||
|
||||
void debugger_addParserSymbols(class ExpressionParser& ep)
|
||||
{
|
||||
for (sint32 i = 0; i < 32; i++)
|
||||
ep.AddConstant(fmt::format("r{}", i), debuggerState.debugSession.ppcSnapshot.gpr[i]);
|
||||
}
|
125
src/Cafe/HW/Espresso/Debugger/Debugger.h
Normal file
125
src/Cafe/HW/Espresso/Debugger/Debugger.h
Normal file
|
@ -0,0 +1,125 @@
|
|||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include "Cafe/HW/Espresso/PPCState.h"
|
||||
|
||||
//#define DEBUGGER_BP_TYPE_NORMAL (1<<0) // normal breakpoint
|
||||
//#define DEBUGGER_BP_TYPE_ONE_SHOT (1<<1) // normal breakpoint
|
||||
//#define DEBUGGER_BP_TYPE_MEMORY_READ (1<<2) // memory breakpoint
|
||||
//#define DEBUGGER_BP_TYPE_MEMORY_WRITE (1<<3) // memory breakpoint
|
||||
|
||||
#define DEBUGGER_BP_T_NORMAL 0 // normal breakpoint
|
||||
#define DEBUGGER_BP_T_ONE_SHOT 1 // normal breakpoint, deletes itself after trigger (used for stepping)
|
||||
#define DEBUGGER_BP_T_MEMORY_READ 2 // memory breakpoint
|
||||
#define DEBUGGER_BP_T_MEMORY_WRITE 3 // memory breakpoint
|
||||
|
||||
#define DEBUGGER_BP_T_GDBSTUB 1 // breakpoint created by GDBStub
|
||||
#define DEBUGGER_BP_T_DEBUGGER 2 // breakpoint created by Cemu's debugger
|
||||
|
||||
|
||||
struct DebuggerBreakpoint
|
||||
{
|
||||
uint32 address;
|
||||
uint32 originalOpcodeValue;
|
||||
mutable uint8 bpType;
|
||||
mutable bool enabled;
|
||||
mutable std::wstring comment;
|
||||
mutable uint8 dbType = DEBUGGER_BP_T_DEBUGGER;
|
||||
|
||||
DebuggerBreakpoint(uint32 address, uint32 originalOpcode, uint8 bpType = 0, bool enabled = true, std::wstring comment = std::wstring())
|
||||
:address(address), originalOpcodeValue(originalOpcode), bpType(bpType), enabled(enabled), comment(std::move(comment))
|
||||
{
|
||||
next = nullptr;
|
||||
}
|
||||
|
||||
|
||||
bool operator<(const DebuggerBreakpoint& rhs) const
|
||||
{
|
||||
return address < rhs.address;
|
||||
}
|
||||
bool operator==(const DebuggerBreakpoint& rhs) const
|
||||
{
|
||||
return address == rhs.address;
|
||||
}
|
||||
|
||||
bool isExecuteBP() const
|
||||
{
|
||||
return bpType == DEBUGGER_BP_T_NORMAL || bpType == DEBUGGER_BP_T_ONE_SHOT;
|
||||
}
|
||||
|
||||
bool isMemBP() const
|
||||
{
|
||||
return bpType == DEBUGGER_BP_T_MEMORY_READ || bpType == DEBUGGER_BP_T_MEMORY_WRITE;
|
||||
}
|
||||
|
||||
DebuggerBreakpoint* next;
|
||||
};
|
||||
|
||||
struct DebuggerPatch
|
||||
{
|
||||
uint32 address;
|
||||
sint32 length;
|
||||
std::vector<uint8> data;
|
||||
std::vector<uint8> origData;
|
||||
};
|
||||
|
||||
struct PPCSnapshot
|
||||
{
|
||||
uint32 gpr[32];
|
||||
FPR_t fpr[32];
|
||||
uint8 cr[32];
|
||||
uint32 spr_lr;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool breakOnEntry;
|
||||
// breakpoints
|
||||
std::vector<DebuggerBreakpoint*> breakpoints;
|
||||
std::vector<DebuggerPatch*> patches;
|
||||
DebuggerBreakpoint* activeMemoryBreakpoint;
|
||||
// debugging state
|
||||
struct
|
||||
{
|
||||
volatile bool shouldBreak; // debugger window requests a break asap
|
||||
volatile bool isTrapped; // if set, breakpoint is active and stepping through the code is possible
|
||||
uint32 debuggedThreadMPTR;
|
||||
volatile uint32 instructionPointer;
|
||||
PPCInterpreter_t* hCPU;
|
||||
// step control
|
||||
volatile bool stepOver;
|
||||
volatile bool stepInto;
|
||||
volatile bool run;
|
||||
// snapshot of PPC state
|
||||
PPCSnapshot ppcSnapshot;
|
||||
}debugSession;
|
||||
|
||||
}debuggerState_t;
|
||||
|
||||
extern debuggerState_t debuggerState;
|
||||
|
||||
// new API
|
||||
DebuggerBreakpoint* debugger_getFirstBP(uint32 address);
|
||||
void debugger_toggleExecuteBreakpoint(uint32 address); // create/remove execute breakpoint
|
||||
void debugger_createExecuteBreakpoint(uint32 address);
|
||||
void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp);
|
||||
|
||||
void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite);
|
||||
|
||||
void debugger_handleEntryBreakpoint(uint32 address);
|
||||
|
||||
void debugger_deleteBreakpoint(DebuggerBreakpoint* bp);
|
||||
|
||||
void debugger_updateExecutionBreakpoint(uint32 address, bool forceRestore = false);
|
||||
|
||||
void debugger_createPatch(uint32 address, std::span<uint8> patchData);
|
||||
bool debugger_hasPatch(uint32 address);
|
||||
|
||||
void debugger_forceBreak(); // force breakpoint at the next possible instruction
|
||||
bool debugger_isTrapped();
|
||||
void debugger_resume();
|
||||
|
||||
void debugger_enterTW(PPCInterpreter_t* hCPU);
|
||||
void debugger_shouldBreak(PPCInterpreter_t* hCPU);
|
||||
|
||||
void debugger_addParserSymbols(class ExpressionParser& ep);
|
197
src/Cafe/HW/Espresso/EspressoISA.h
Normal file
197
src/Cafe/HW/Espresso/EspressoISA.h
Normal file
|
@ -0,0 +1,197 @@
|
|||
#pragma once
|
||||
|
||||
namespace Espresso
|
||||
{
|
||||
enum CR_BIT
|
||||
{
|
||||
CR_BIT_INDEX_LT = 0,
|
||||
CR_BIT_INDEX_GT = 1,
|
||||
CR_BIT_INDEX_EQ = 2,
|
||||
CR_BIT_INDEX_SO = 3,
|
||||
};
|
||||
|
||||
enum class PrimaryOpcode
|
||||
{
|
||||
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)
|
||||
ZERO = 0,
|
||||
VIRTUAL_HLE = 1,
|
||||
|
||||
// 3 = TWI
|
||||
GROUP_4 = 4,
|
||||
MULLI = 7,
|
||||
SUBFIC = 8,
|
||||
CMPLI = 10,
|
||||
CMPI = 11,
|
||||
ADDIC = 12,
|
||||
ADDIC_ = 13,
|
||||
ADDI = 14,
|
||||
ADDIS = 15,
|
||||
BC = 16, // conditional branch
|
||||
GROUP_17 = 17, // SC
|
||||
B = 18, // unconditional branch
|
||||
GROUP_19 = 19,
|
||||
RLWIMI = 20,
|
||||
RLWINM = 21,
|
||||
// 22 ?
|
||||
RLWNM = 23,
|
||||
ORI = 24,
|
||||
ORIS = 25,
|
||||
XORI = 26,
|
||||
XORIS = 27,
|
||||
ANDI_ = 28,
|
||||
ANDIS_ = 29,
|
||||
GROUP_31 = 31,
|
||||
LWZ = 32,
|
||||
LWZU = 33,
|
||||
LBZ = 34,
|
||||
LBZU = 35,
|
||||
STW = 36,
|
||||
STWU = 37,
|
||||
STB = 38,
|
||||
STBU = 39,
|
||||
LHZ = 40,
|
||||
LHZU = 41,
|
||||
LHA = 42,
|
||||
LHAU = 43,
|
||||
STH = 44,
|
||||
STHU = 45,
|
||||
LMW = 46,
|
||||
STMW = 47,
|
||||
LFS = 48,
|
||||
LFSU = 49,
|
||||
LFD = 50,
|
||||
LFDU = 51,
|
||||
STFS = 52,
|
||||
STFSU = 53,
|
||||
STFD = 54,
|
||||
STFDU = 55,
|
||||
PSQ_L = 56,
|
||||
PSQ_LU = 57,
|
||||
// 58 ?
|
||||
GROUP_59 = 59,
|
||||
PSQ_ST = 60,
|
||||
PSQ_STU = 61,
|
||||
// 62 ?
|
||||
GROUP_63 = 63,
|
||||
};
|
||||
|
||||
enum class Opcode19
|
||||
{
|
||||
MCRF = 0,
|
||||
BCLR = 16,
|
||||
CRNOR = 33,
|
||||
RFI = 50,
|
||||
CRANDC = 129,
|
||||
ISYNC = 150,
|
||||
CRXOR = 193,
|
||||
CRAND = 257,
|
||||
CREQV = 289,
|
||||
CRORC = 417,
|
||||
CROR = 449,
|
||||
BCCTR = 528
|
||||
};
|
||||
|
||||
enum class OPCODE_31
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); };
|
||||
inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); };
|
||||
|
||||
struct BOField
|
||||
{
|
||||
BOField() {};
|
||||
BOField(uint8 bo) : bo(bo) {};
|
||||
|
||||
bool conditionInverted() const
|
||||
{
|
||||
return (bo & 8) == 0;
|
||||
}
|
||||
|
||||
bool decrementerIgnore() const
|
||||
{
|
||||
return (bo & 4) != 0;
|
||||
}
|
||||
|
||||
bool decrementerMustBeZero() const
|
||||
{
|
||||
return (bo & 2) != 0;
|
||||
}
|
||||
|
||||
bool conditionIgnore() const
|
||||
{
|
||||
return (bo & 16) != 0;
|
||||
}
|
||||
|
||||
bool branchAlways()
|
||||
{
|
||||
return conditionIgnore() && decrementerIgnore();
|
||||
}
|
||||
|
||||
uint8 bo;
|
||||
};
|
||||
|
||||
inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK)
|
||||
{
|
||||
LI = opcode & 0x3fffffc;
|
||||
if (LI & 0x02000000)
|
||||
LI |= 0xfc000000;
|
||||
AA = (opcode & 2) != 0;
|
||||
LK = (opcode & 1) != 0;
|
||||
}
|
||||
|
||||
inline void _decodeForm_D_branch(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK)
|
||||
{
|
||||
BD = opcode & 0xfffc;
|
||||
if (BD & 0x8000)
|
||||
BD |= 0xffff0000;
|
||||
BO = { (uint8)((opcode >> 21) & 0x1F) };
|
||||
BI = (opcode >> 16) & 0x1F;
|
||||
AA = (opcode & 2) != 0;
|
||||
LK = (opcode & 1) != 0;
|
||||
}
|
||||
|
||||
inline void _decodeForm_D_SImm(uint32 opcode, uint32& rD, uint32& rA, uint32& imm)
|
||||
{
|
||||
rD = (opcode >> 21) & 0x1F;
|
||||
rA = (opcode >> 16) & 0x1F;
|
||||
imm = (uint32)(sint32)(sint16)(opcode & 0xFFFF);
|
||||
}
|
||||
|
||||
inline void _decodeForm_XL(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
|
||||
{
|
||||
BO = { (uint8)((opcode >> 21) & 0x1F) };
|
||||
BI = (opcode >> 16) & 0x1F;
|
||||
LK = (opcode & 1) != 0;
|
||||
}
|
||||
|
||||
inline void decodeOp_ADDI(uint32 opcode, uint32& rD, uint32& rA, uint32& imm)
|
||||
{
|
||||
_decodeForm_D_SImm(opcode, rD, rA, imm);
|
||||
}
|
||||
|
||||
inline void decodeOp_B(uint32 opcode, uint32& LI, bool& AA, bool& LK)
|
||||
{
|
||||
// form I
|
||||
_decodeForm_I(opcode, LI, AA, LK);
|
||||
}
|
||||
|
||||
inline void decodeOp_BC(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK)
|
||||
{
|
||||
// form D
|
||||
_decodeForm_D_branch(opcode, BD, BO, BI, AA, LK);
|
||||
}
|
||||
|
||||
inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
|
||||
{
|
||||
// form XL (with BD field expected to be zero)
|
||||
_decodeForm_XL(opcode, BO, BI, LK);
|
||||
}
|
||||
|
||||
inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
|
||||
{
|
||||
// form XL (with BD field expected to be zero)
|
||||
_decodeForm_XL(opcode, BO, BI, LK);
|
||||
}
|
||||
}
|
978
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
Normal file
978
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
Normal file
|
@ -0,0 +1,978 @@
|
|||
|
||||
static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow)
|
||||
{
|
||||
if (hasOverflow)
|
||||
{
|
||||
hCPU->spr.XER |= XER_SO;
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
}
|
||||
}
|
||||
|
||||
static bool checkAdditionOverflow(uint32 x, uint32 y, uint32 r)
|
||||
{
|
||||
// todo - update remaining *O instructions to use this function
|
||||
|
||||
/*
|
||||
x y r result (has overflow)
|
||||
0 0 0 0
|
||||
1 0 0 0
|
||||
0 1 0 0
|
||||
1 1 0 1
|
||||
0 0 1 1
|
||||
1 0 1 0
|
||||
0 1 1 0
|
||||
1 1 1 0
|
||||
|
||||
*/
|
||||
return (((x ^ r) & (y ^ r)) >> 31) != 0;
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
// untested (Don't Starve Giant Edition uses this instruction + BSO)
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint64 result = (uint64)hCPU->gpr[rA] + (uint64)hCPU->gpr[rB];
|
||||
hCPU->gpr[rD] = (uint32)result;
|
||||
if (result >= 0x100000000ULL)
|
||||
{
|
||||
hCPU->spr.XER |= XER_SO;
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
}
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
hCPU->gpr[rD] = a + hCPU->gpr[rB];
|
||||
if (hCPU->gpr[rD] < a)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDCO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
hCPU->gpr[rD] = a + b;
|
||||
if (hCPU->gpr[rD] < a)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
// set SO/OV
|
||||
if (hCPU->gpr[rD] < a)
|
||||
{
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
hCPU->spr.XER |= XER_SO;
|
||||
}
|
||||
else
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDE(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = a + b + ca;
|
||||
// update xer
|
||||
if (ppc_carry_3(a, b, ca))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDEO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
// used by DS Virtual Console (Super Mario 64 DS)
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = a + b + ca;
|
||||
// update xer carry
|
||||
if (ppc_carry_3(a, b, ca))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, b, hCPU->gpr[rD]));
|
||||
// update CR
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
sint32 rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
|
||||
hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
hCPU->gpr[rD] = a + imm;
|
||||
// update XER
|
||||
if (hCPU->gpr[rD] < a)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
hCPU->gpr[rD] = a + imm;
|
||||
// update XER
|
||||
if (hCPU->gpr[rD] < a)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
// update cr0 flags
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDIS(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm);
|
||||
hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0) + imm;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDZE(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = a + ca;
|
||||
if ((a == 0xffffffff) && ca)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = a + ca + 0xffffffff;
|
||||
if (a || ca)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
// untested (Don't Starve Giant Edition uses this)
|
||||
// also used by DS Virtual Console (Super Mario 64 DS)
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~hCPU->gpr[rA], hCPU->gpr[rB], hCPU->gpr[rD]));
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
hCPU->gpr[rD] = ~a + b + 1;
|
||||
// update xer
|
||||
if (ppc_carry_3(~a, b, 1))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
// used by DS Virtual Console (Super Mario 64 DS)
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
hCPU->gpr[rD] = ~a + b + 1;
|
||||
// update xer
|
||||
if (ppc_carry_3(~a, b, 1))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
// update xer SO/OV
|
||||
if (checkAdditionOverflow(~a, b, hCPU->gpr[rD]))
|
||||
{
|
||||
hCPU->spr.XER |= XER_SO;
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
}
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFIC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
hCPU->gpr[rD] = ~a + imm + 1;
|
||||
if (ppc_carry_3(~a, imm, 1))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFE(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = ~a + b + ca;
|
||||
// update xer carry
|
||||
if (ppc_carry_3(~a, b, ca))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
// update cr0
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFEO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
uint32 result = ~a + b + ca;
|
||||
hCPU->gpr[rD] = result;
|
||||
// update xer carry
|
||||
if (ppc_carry_3(~a, b, ca))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (checkAdditionOverflow(~a, b, result))
|
||||
{
|
||||
hCPU->spr.XER |= XER_SO;
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
}
|
||||
// update cr0
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFZE(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = ~a + ca;
|
||||
if (a == 0 && ca)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
// untested
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = ~a + 0xFFFFFFFF + ca;
|
||||
// update xer carry
|
||||
if (ppc_carry_3(~a, 0xFFFFFFFF, ca))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
// update cr0
|
||||
if (opcode & PPC_OPC_RC)
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MULHW_(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
sint64 a = (sint32)hCPU->gpr[rA];
|
||||
sint64 b = (sint32)hCPU->gpr[rB];
|
||||
sint64 c = a * b;
|
||||
hCPU->gpr[rD] = ((uint64)c) >> 32;
|
||||
if (opcode & PPC_OPC_RC) {
|
||||
// update cr0 flags
|
||||
#ifndef PUBLIC_RELEASE
|
||||
assert_dbg();
|
||||
#endif
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
}
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MULHWU_(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint64 a = hCPU->gpr[rA];
|
||||
uint64 b = hCPU->gpr[rB];
|
||||
uint64 c = a * b;
|
||||
hCPU->gpr[rD] = c >> 32;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MULLW(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
sint64 result = (sint64)hCPU->gpr[rA] * (sint64)hCPU->gpr[rB];
|
||||
hCPU->gpr[rD] = (uint32)result;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MULLWO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
// Don't Starve Giant Edition uses this instruction + BSO
|
||||
// also used by FullBlast when a save file exists + it uses mfxer to access overflow result
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
sint64 result = (sint64)hCPU->gpr[rA] * (sint64)hCPU->gpr[rB];
|
||||
hCPU->gpr[rD] = (uint32)result;
|
||||
if (result < -0x80000000ll && result > 0x7FFFFFFFLL)
|
||||
{
|
||||
hCPU->spr.XER |= XER_SO;
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
}
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MULLI(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
|
||||
hCPU->gpr[rD] = hCPU->gpr[rA] * imm;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
sint32 a = hCPU->gpr[rA];
|
||||
sint32 b = hCPU->gpr[rB];
|
||||
if (b == 0)
|
||||
{
|
||||
forceLogDebug_printf("Error: Division by zero! [%08X]\n", (uint32)hCPU->instructionPointer);
|
||||
b++;
|
||||
}
|
||||
hCPU->gpr[rD] = a / b;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
sint32 a = hCPU->gpr[rA];
|
||||
sint32 b = hCPU->gpr[rB];
|
||||
if (b == 0)
|
||||
{
|
||||
if (opcode & PPC_OPC_OE)
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
return;
|
||||
}
|
||||
hCPU->gpr[rD] = a / b;
|
||||
if (opcode & PPC_OPC_OE)
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
// todo: Handle SO
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
if (hCPU->gpr[rB] == 0)
|
||||
{
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
return;
|
||||
}
|
||||
hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_DIVWUO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
if (hCPU->gpr[rB] == 0)
|
||||
{
|
||||
if (opcode & PPC_OPC_OE)
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
return;
|
||||
}
|
||||
hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
|
||||
if (opcode & PPC_OPC_OE)
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
// todo: Handle SO
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CREQV(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA) ^ ppc_getCRBit(hCPU, crB) ^ 1);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CRAND(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA)&ppc_getCRBit(hCPU, crB));
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CRANDC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA)&(ppc_getCRBit(hCPU, crB) ^ 1));
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CROR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA) | ppc_getCRBit(hCPU, crB));
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CRORC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA) | (ppc_getCRBit(hCPU, crB) ^ 1));
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CRNOR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
ppc_setCRBit(hCPU, crD, (ppc_getCRBit(hCPU, crA) | ppc_getCRBit(hCPU, crB)) ^ 1);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CRXOR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
ppc_setCRBit(hCPU, crD, ppc_getCRBit(hCPU, crA) ^ ppc_getCRBit(hCPU, crB));
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_NEG(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
hCPU->gpr[rD] = (uint32)-((sint32)hCPU->gpr[rA]);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_NEGO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
if (hCPU->gpr[rA] == 0x80000000)
|
||||
{
|
||||
hCPU->spr.XER |= XER_SO;
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
}
|
||||
hCPU->gpr[rD] = (uint32)-((sint32)hCPU->gpr[rA]);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ANDX(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rA] = hCPU->gpr[rD] & hCPU->gpr[rB];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ANDCX(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rA] = hCPU->gpr[rD] & ~hCPU->gpr[rB];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ANDI_(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rS, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
|
||||
hCPU->gpr[rA] = hCPU->gpr[rS] & imm;
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ANDIS_(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rS, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
|
||||
hCPU->gpr[rA] = hCPU->gpr[rS] & imm;
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_NANDX(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rA] = ~(hCPU->gpr[rD] & hCPU->gpr[rB]);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_OR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rA] = hCPU->gpr[rD] | hCPU->gpr[rB];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ORC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rA] = hCPU->gpr[rD] | ~hCPU->gpr[rB];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ORI(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rS, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
|
||||
hCPU->gpr[rA] = hCPU->gpr[rS] | imm;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ORIS(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rS, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
|
||||
hCPU->gpr[rA] = hCPU->gpr[rS] | imm;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_NORX(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rA] = ~(hCPU->gpr[rD] | hCPU->gpr[rB]);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_XOR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rA] = hCPU->gpr[rD] ^ hCPU->gpr[rB];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_XORI(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rS, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
|
||||
hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_XORIS(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rS, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
|
||||
hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_EQV(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
hCPU->gpr[rA] = ~(hCPU->gpr[rD] ^ hCPU->gpr[rB]);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_RLWIMI(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rS, rA, SH, MB, ME;
|
||||
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
|
||||
uint32 v = ppc_word_rotl(hCPU->gpr[rS], SH);
|
||||
uint32 mask = ppc_mask(MB, ME);
|
||||
hCPU->gpr[rA] = (v & mask) | (hCPU->gpr[rA] & ~mask);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_RLWINM(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
sint32 rS, rA, SH, MB, ME;
|
||||
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
|
||||
uint32 v = ppc_word_rotl(hCPU->gpr[rS], SH);
|
||||
uint32 mask = ppc_mask(MB, ME);
|
||||
hCPU->gpr[rA] = v & mask;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_RLWNM(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rS, rA, rB, MB, ME;
|
||||
PPC_OPC_TEMPL_M(opcode, rS, rA, rB, MB, ME);
|
||||
uint32 v = ppc_word_rotl(hCPU->gpr[rS], hCPU->gpr[rB]);
|
||||
uint32 mask = ppc_mask(MB, ME);
|
||||
hCPU->gpr[rA] = v & mask;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SLWX(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 s = hCPU->gpr[rB] & 0x3f;
|
||||
if (s > 31)
|
||||
hCPU->gpr[rA] = 0;
|
||||
else
|
||||
hCPU->gpr[rA] = hCPU->gpr[rD] << s;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SRAW(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 sh = hCPU->gpr[rB] & 0x3f;
|
||||
hCPU->gpr[rA] = hCPU->gpr[rD];
|
||||
if (sh > 31)
|
||||
{
|
||||
hCPU->xer_ca = (hCPU->gpr[rA] >> 31) & 1; // copy sign bit to ca
|
||||
hCPU->gpr[rA] = (uint32)((sint32)hCPU->gpr[rA] >> 31); // fill all bits with sign bit
|
||||
}
|
||||
else
|
||||
{
|
||||
// ca is set when input is negative and non-zero bits are dropped by shift operation
|
||||
uint8 caBit = (hCPU->gpr[rA] >> 31) & 1;
|
||||
uint32 shiftedBits = hCPU->gpr[rA] & ~(0xFFFFFFFF << sh);
|
||||
caBit &= (shiftedBits != 0 ? 1 : 0);
|
||||
hCPU->xer_ca = caBit;
|
||||
hCPU->gpr[rA] = (uint32)((sint32)hCPU->gpr[rA] >> sh);
|
||||
}
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SRWX(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 v = hCPU->gpr[rB] & 0x3f;
|
||||
if (v > 31)
|
||||
hCPU->gpr[rA] = 0;
|
||||
else
|
||||
hCPU->gpr[rA] = hCPU->gpr[rD] >> v;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SRAWI(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
sint32 rS, rA;
|
||||
uint32 SH;
|
||||
PPC_OPC_TEMPL_X(opcode, rS, rA, SH);
|
||||
hCPU->gpr[rA] = hCPU->gpr[rS];
|
||||
hCPU->xer_ca = 0;
|
||||
if (hCPU->gpr[rA] & 0x80000000)
|
||||
{
|
||||
uint32 ca = 0;
|
||||
for (uint32 i = 0; i < SH; i++)
|
||||
{
|
||||
if (hCPU->gpr[rA] & 1)
|
||||
ca = 1;
|
||||
hCPU->gpr[rA] >>= 1;
|
||||
hCPU->gpr[rA] |= 0x80000000;
|
||||
}
|
||||
if (ca)
|
||||
hCPU->xer_ca = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (SH > 31)
|
||||
hCPU->gpr[rA] = 0;
|
||||
else
|
||||
hCPU->gpr[rA] >>= SH;
|
||||
}
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static uint32 _CNTLZW(uint32 v)
|
||||
{
|
||||
uint32 result = 0;
|
||||
if (v == 0)
|
||||
return 32;
|
||||
if ((v & 0xFFFF0000) != 0) { result |= 16; v >>= 16; }
|
||||
if ((v & 0xFF00FF00) != 0) { result |= 8; v >>= 8; }
|
||||
if ((v & 0xF0F0F0F0) != 0) { result |= 4; v >>= 4; }
|
||||
if ((v & 0xCCCCCCCC) != 0) { result |= 2; v >>= 2; }
|
||||
if ((v & 0xAAAAAAAA) != 0) { result |= 1; }
|
||||
result = 31 - result;
|
||||
return result;
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CNTLZW(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
hCPU->gpr[rA] = _CNTLZW(hCPU->gpr[rD]);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_EXTSB(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
hCPU->gpr[rA] = (uint32)(sint32)(sint8)hCPU->gpr[rD];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_EXTSH(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
hCPU->gpr[rA] = (uint32)(sint32)(sint16)hCPU->gpr[rD];
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rA]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 cr;
|
||||
sint32 rA, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, cr, rA, rB);
|
||||
cr >>= 2;
|
||||
sint32 a = hCPU->gpr[rA];
|
||||
sint32 b = hCPU->gpr[rB];
|
||||
hCPU->cr[cr * 4 + 0] = 0;
|
||||
hCPU->cr[cr * 4 + 1] = 0;
|
||||
hCPU->cr[cr * 4 + 2] = 0;
|
||||
hCPU->cr[cr * 4 + 3] = 0;
|
||||
if (a < b)
|
||||
hCPU->cr[cr * 4 + CR_BIT_LT] = 1;
|
||||
else if (a > b)
|
||||
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
|
||||
else
|
||||
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
|
||||
if ((hCPU->spr.XER & XER_SO) != 0)
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 cr;
|
||||
int rA, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, cr, rA, rB);
|
||||
cr >>= 2;
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
hCPU->cr[cr * 4 + 0] = 0;
|
||||
hCPU->cr[cr * 4 + 1] = 0;
|
||||
hCPU->cr[cr * 4 + 2] = 0;
|
||||
hCPU->cr[cr * 4 + 3] = 0;
|
||||
if (a < b)
|
||||
hCPU->cr[cr * 4 + CR_BIT_LT] = 1;
|
||||
else if (a > b)
|
||||
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
|
||||
else
|
||||
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
|
||||
if ((hCPU->spr.XER & XER_SO) != 0)
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 cr;
|
||||
int rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm);
|
||||
cr >>= 2;
|
||||
sint32 a = hCPU->gpr[rA];
|
||||
sint32 b = imm;
|
||||
hCPU->cr[cr * 4 + 0] = 0;
|
||||
hCPU->cr[cr * 4 + 1] = 0;
|
||||
hCPU->cr[cr * 4 + 2] = 0;
|
||||
hCPU->cr[cr * 4 + 3] = 0;
|
||||
if (a < b)
|
||||
hCPU->cr[cr * 4 + CR_BIT_LT] = 1;
|
||||
else if (a > b)
|
||||
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
|
||||
else
|
||||
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
|
||||
if (hCPU->spr.XER & XER_SO)
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 cr;
|
||||
int rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm);
|
||||
cr >>= 2;
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = imm;
|
||||
hCPU->cr[cr * 4 + 0] = 0;
|
||||
hCPU->cr[cr * 4 + 1] = 0;
|
||||
hCPU->cr[cr * 4 + 2] = 0;
|
||||
hCPU->cr[cr * 4 + 3] = 0;
|
||||
if (a < b)
|
||||
hCPU->cr[cr * 4 + CR_BIT_LT] = 1;
|
||||
else if (a > b)
|
||||
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
|
||||
else
|
||||
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
|
||||
if (hCPU->spr.XER & XER_SO)
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
700
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
Normal file
700
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
Normal file
|
@ -0,0 +1,700 @@
|
|||
#include "../PPCState.h"
|
||||
#include "PPCInterpreterInternal.h"
|
||||
#include "PPCInterpreterHelper.h"
|
||||
|
||||
#include<math.h>
|
||||
|
||||
// floating point utility
|
||||
|
||||
#include <limits>
|
||||
#include <array>
|
||||
|
||||
const int ieee_double_e_bits = 11; // exponent bits
|
||||
const int ieee_double_m_bits = 52; // mantissa bits
|
||||
|
||||
const int espresso_frsqrte_i_bits = 5; // index bits (the highest bit is the LSB of the exponent)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 offset;
|
||||
uint32 step;
|
||||
}espresso_frsqrte_entry_t;
|
||||
|
||||
espresso_frsqrte_entry_t frsqrteLookupTable[32] =
|
||||
{
|
||||
{0x1a7e800, 0x568},{0x17cb800, 0x4f3},{0x1552800, 0x48d},{0x130c000, 0x435},
|
||||
{0x10f2000, 0x3e7},{0xeff000, 0x3a2},{0xd2e000, 0x365},{0xb7c000, 0x32e},
|
||||
{0x9e5000, 0x2fc},{0x867000, 0x2d0},{0x6ff000, 0x2a8},{0x5ab800, 0x283},
|
||||
{0x46a000, 0x261},{0x339800, 0x243},{0x218800, 0x226},{0x105800, 0x20b},
|
||||
{0x3ffa000, 0x7a4},{0x3c29000, 0x700},{0x38aa000, 0x670},{0x3572000, 0x5f2},
|
||||
{0x3279000, 0x584},{0x2fb7000, 0x524},{0x2d26000, 0x4cc},{0x2ac0000, 0x47e},
|
||||
{0x2881000, 0x43a},{0x2665000, 0x3fa},{0x2468000, 0x3c2},{0x2287000, 0x38e},
|
||||
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
|
||||
};
|
||||
|
||||
double frsqrte_espresso(double input)
|
||||
{
|
||||
unsigned long long x = *(unsigned long long*)&input;
|
||||
|
||||
// 0.0 and -0.0
|
||||
if ((x << 1) == 0)
|
||||
{
|
||||
// result is inf or -inf
|
||||
x &= ~0x7FFFFFFFFFFFFFFF;
|
||||
x |= 0x7FF0000000000000;
|
||||
return *(double*)&x;
|
||||
}
|
||||
// get exponent
|
||||
uint32 e = (x >> ieee_double_m_bits) & ((1ull << ieee_double_e_bits) - 1ull);
|
||||
// NaN or INF
|
||||
if (e == 0x7FF)
|
||||
{
|
||||
if ((x&((1ull << ieee_double_m_bits) - 1)) == 0)
|
||||
{
|
||||
// negative INF returns +NaN
|
||||
if ((sint64)x < 0)
|
||||
{
|
||||
x = 0x7FF8000000000000;
|
||||
return *(double*)&x;
|
||||
}
|
||||
// positive INF returns +0.0
|
||||
return 0.0;
|
||||
}
|
||||
// result is NaN with same sign and same mantissa (todo: verify)
|
||||
return *(double*)&x;
|
||||
}
|
||||
// negative number (other than -0.0)
|
||||
if ((sint64)x < 0)
|
||||
{
|
||||
// result is positive NaN
|
||||
x = 0x7FF8000000000000;
|
||||
return *(double*)&x;
|
||||
}
|
||||
// todo: handle denormals
|
||||
|
||||
// get index (lsb of exponent, remaining bits of mantissa)
|
||||
uint32 idx = (x >> (ieee_double_m_bits - espresso_frsqrte_i_bits + 1ull))&((1 << espresso_frsqrte_i_bits) - 1);
|
||||
// get step multiplier
|
||||
uint32 stepMul = (x >> (ieee_double_m_bits - espresso_frsqrte_i_bits + 1 - 11))&((1 << 11) - 1);
|
||||
|
||||
sint32 sum = frsqrteLookupTable[idx].offset - frsqrteLookupTable[idx].step * stepMul;
|
||||
|
||||
e = 1023 - ((e - 1021) >> 1);
|
||||
x &= ~(((1ull << ieee_double_e_bits) - 1ull) << ieee_double_m_bits);
|
||||
x |= ((unsigned long long)e << ieee_double_m_bits);
|
||||
|
||||
x &= ~((1ull << ieee_double_m_bits) - 1ull);
|
||||
x += ((unsigned long long)sum << 26ull);
|
||||
|
||||
return *(double*)&x;
|
||||
}
|
||||
|
||||
const int espresso_fres_i_bits = 5; // index bits
|
||||
const int espresso_fres_s_bits = 10; // step multiplier bits
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 offset;
|
||||
uint32 step;
|
||||
}espresso_fres_entry_t;
|
||||
|
||||
espresso_fres_entry_t fresLookupTable[32] =
|
||||
{
|
||||
// table calculated by fres_gen_table()
|
||||
{0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340},
|
||||
{0x638800, 0x313}, {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0},
|
||||
{0x4cc800, 0x27f}, {0x47ca00, 0x261}, {0x430800, 0x245}, {0x3e8000, 0x22a},
|
||||
{0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5}, {0x2e4a00, 0x1d1},
|
||||
{0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b},
|
||||
{0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b},
|
||||
{0x124400, 0x143}, {0xfbe00, 0x143}, {0xd3800, 0x12d}, {0xade00, 0x12d},
|
||||
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
|
||||
};
|
||||
|
||||
double fres_espresso(double input)
|
||||
{
|
||||
// based on testing we know that fres uses only the first 15 bits of the mantissa
|
||||
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
|
||||
// the mantissa bits are interpreted as following:
|
||||
// 0000 0000 0000 iiii ifff ffff fff0 ... (i = table look up index , f = step multiplier)
|
||||
unsigned long long x = *(unsigned long long*)&input;
|
||||
|
||||
// get index
|
||||
uint32 idx = (x >> (ieee_double_m_bits - espresso_fres_i_bits))&((1 << espresso_fres_i_bits) - 1);
|
||||
// get step multiplier
|
||||
uint32 stepMul = (x >> (ieee_double_m_bits - espresso_fres_i_bits - 10))&((1 << 10) - 1);
|
||||
|
||||
|
||||
uint32 sum = fresLookupTable[idx].offset - (fresLookupTable[idx].step * stepMul + 1) / 2;
|
||||
|
||||
// get exponent
|
||||
uint32 e = (x >> ieee_double_m_bits) & ((1ull << ieee_double_e_bits) - 1ull);
|
||||
if (e == 0)
|
||||
{
|
||||
// todo?
|
||||
//x &= 0x7FFFFFFFFFFFFFFFull;
|
||||
x |= 0x7FF0000000000000ull;
|
||||
return *(double*)&x;
|
||||
}
|
||||
else if (e == 0x7ff) // NaN or INF
|
||||
{
|
||||
if ((x&((1ull << ieee_double_m_bits) - 1)) == 0)
|
||||
{
|
||||
// negative INF returns -0.0
|
||||
if ((sint64)x < 0)
|
||||
{
|
||||
x = 0x8000000000000000;
|
||||
return *(double*)&x;
|
||||
}
|
||||
// positive INF returns +0.0
|
||||
return 0.0;
|
||||
}
|
||||
// result is NaN with same sign and same mantissa (todo: verify)
|
||||
return *(double*)&x;
|
||||
}
|
||||
// todo - needs more testing (especially NaN and INF values)
|
||||
|
||||
e = 2045 - e;
|
||||
x &= ~(((1ull << ieee_double_e_bits) - 1ull) << ieee_double_m_bits);
|
||||
x |= ((unsigned long long)e << ieee_double_m_bits);
|
||||
|
||||
x &= ~((1ull << ieee_double_m_bits) - 1ull);
|
||||
x += ((unsigned long long)sum << 29ull);
|
||||
|
||||
return *(double*)&x;
|
||||
}
|
||||
|
||||
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b)
|
||||
{
|
||||
uint32 c;
|
||||
|
||||
ppc_setCRBit(hCPU, crfD + 0, 0);
|
||||
ppc_setCRBit(hCPU, crfD + 1, 0);
|
||||
ppc_setCRBit(hCPU, crfD + 2, 0);
|
||||
ppc_setCRBit(hCPU, crfD + 3, 0);
|
||||
|
||||
if (IS_NAN(*(uint64*)&a) || IS_NAN(*(uint64*)&b))
|
||||
{
|
||||
c = 1;
|
||||
ppc_setCRBit(hCPU, crfD + CR_BIT_SO, 1);
|
||||
}
|
||||
else if (a < b)
|
||||
{
|
||||
c = 8;
|
||||
ppc_setCRBit(hCPU, crfD + CR_BIT_LT, 1);
|
||||
}
|
||||
else if (a > b)
|
||||
{
|
||||
c = 4;
|
||||
ppc_setCRBit(hCPU, crfD + CR_BIT_GT, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
c = 2;
|
||||
ppc_setCRBit(hCPU, crfD + CR_BIT_EQ, 1);
|
||||
}
|
||||
|
||||
if (IS_SNAN(*(uint64*)&a) || IS_SNAN(*(uint64*)&b))
|
||||
hCPU->fpscr |= FPSCR_VXSNAN;
|
||||
|
||||
hCPU->fpscr = (hCPU->fpscr & 0xffff0fff) | (c << 12);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FMR(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, rA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, frD, rA, frB);
|
||||
PPC_ASSERT(rA==0);
|
||||
hCPU->fpr[frD].fpr = hCPU->fpr[frB].fpr;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FSEL(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
if ( hCPU->fpr[frA].fp0 >= -0.0f )
|
||||
hCPU->fpr[frD] = hCPU->fpr[frC];
|
||||
else
|
||||
hCPU->fpr[frD] = hCPU->fpr[frB];
|
||||
PPC_ASSERT((Opcode & PPC_OPC_RC) != 0); // update CR1 flags
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FCTIWZ(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
int frD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
|
||||
PPC_ASSERT(frA==0);
|
||||
|
||||
double b = hCPU->fpr[frB].fpr;
|
||||
uint64 v;
|
||||
if (b > (double)0x7FFFFFFF)
|
||||
{
|
||||
v = (uint64)0x7FFFFFFF;
|
||||
}
|
||||
else if (b < -(double)0x80000000)
|
||||
{
|
||||
v = (uint64)0x80000000;
|
||||
}
|
||||
else
|
||||
{
|
||||
v = (uint64)(uint32)(sint32)b;
|
||||
}
|
||||
|
||||
hCPU->fpr[frD].guint = 0xFFF8000000000000ULL | v;
|
||||
if (v == 0 && ((*(uint64*)&b) >> 63))
|
||||
hCPU->fpr[frD].guint |= 0x100000000ull;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FCTIW(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
|
||||
PPC_ASSERT(frA==0);
|
||||
|
||||
double b = hCPU->fpr[frB].fpr;
|
||||
uint64 v;
|
||||
if (b > (double)0x7FFFFFFF)
|
||||
{
|
||||
v = (uint64)0x7FFFFFFF;
|
||||
}
|
||||
else if (b < -(double)0x80000000)
|
||||
{
|
||||
v = (uint64)0x80000000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// todo: Support for other rounding modes than NEAR
|
||||
double t = b + 0.5;
|
||||
sint32 i = (sint32)t;
|
||||
if (t - i < 0 || (t - i == 0 && b > 0))
|
||||
{
|
||||
i--;
|
||||
}
|
||||
v = (uint64)i;
|
||||
}
|
||||
hCPU->fpr[frD].guint = 0xFFF8000000000000ULL | v;
|
||||
if (v == 0 && ((*(uint64*)&b) >> 63))
|
||||
hCPU->fpr[frD].guint |= 0x100000000ull;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FNEG(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
|
||||
PPC_ASSERT(frA==0);
|
||||
|
||||
hCPU->fpr[frD].guint = hCPU->fpr[frB].guint ^ (1ULL << 63);
|
||||
|
||||
PPC_ASSERT((Opcode & PPC_OPC_RC) != 0); // update CR1 flags
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FRSP(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
|
||||
PPC_ASSERT(frA==0);
|
||||
|
||||
if( PPC_PSE )
|
||||
{
|
||||
hCPU->fpr[frD].fp0 = (float)hCPU->fpr[frB].fpr;
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->fpr[frD].fpr = (float)hCPU->fpr[frB].fpr;
|
||||
}
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FRSQRTE(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frA==0 && frC==0);
|
||||
|
||||
hCPU->fpr[frD].fpr = frsqrte_espresso(hCPU->fpr[frB].fpr);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FRES(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frA==0 && frC==0);
|
||||
|
||||
hCPU->fpr[frD].fpr = fres_espresso(hCPU->fpr[frB].fpr);
|
||||
|
||||
if(PPC_PSE)
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
// Floating point ALU
|
||||
|
||||
void PPCInterpreter_FABS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
|
||||
PPC_ASSERT(frA==0);
|
||||
|
||||
hCPU->fpr[frD].guint = hCPU->fpr[frB].guint & ~0x8000000000000000;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FNABS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, frD, frA, frB);
|
||||
PPC_ASSERT(frA==0);
|
||||
|
||||
hCPU->fpr[frD].guint = hCPU->fpr[frB].guint | 0x8000000000000000;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FADD(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frC==0);
|
||||
|
||||
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr + hCPU->fpr[frB].fpr;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FDIV(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frC==0);
|
||||
|
||||
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr / hCPU->fpr[frB].fpr;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frC==0);
|
||||
|
||||
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr - hCPU->fpr[frB].fpr;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FMUL(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frC == 0);
|
||||
|
||||
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FMADD(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
|
||||
hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FNMADD(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
|
||||
hCPU->fpr[frD].fpr = -(hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FMSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
|
||||
hCPU->fpr[frD].fpr = (hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr - hCPU->fpr[frB].fpr);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FNMSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
|
||||
hCPU->fpr[frD].fpr = -(hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr - hCPU->fpr[frB].fpr);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
// Move
|
||||
|
||||
void PPCInterpreter_MFFS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, rA, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, frD, rA, rB);
|
||||
PPC_ASSERT(rA==0 && rB==0);
|
||||
hCPU->fpr[frD].guint = (uint64)hCPU->fpscr;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_MTFSF(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frB;
|
||||
uint32 fm, FM;
|
||||
PPC_OPC_TEMPL_XFL(Opcode, frB, fm);
|
||||
FM = ((fm&0x80)?0xf0000000:0)|((fm&0x40)?0x0f000000:0)|((fm&0x20)?0x00f00000:0)|((fm&0x10)?0x000f0000:0)|
|
||||
((fm&0x08)?0x0000f000:0)|((fm&0x04)?0x00000f00:0)|((fm&0x02)?0x000000f0:0)|((fm&0x01)?0x0000000f:0);
|
||||
hCPU->fpscr = (hCPU->fpr[frB].guint & FM) | (hCPU->fpscr & ~FM);
|
||||
|
||||
PPC_ASSERT((Opcode & PPC_OPC_RC) != 0); // update CR1 flags
|
||||
|
||||
static bool logFPSCRWriteOnce = false;
|
||||
if( logFPSCRWriteOnce == false )
|
||||
{
|
||||
forceLog_printf("Unsupported write to FPSCR\n");
|
||||
logFPSCRWriteOnce = true;
|
||||
}
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
// single precision
|
||||
|
||||
void PPCInterpreter_FADDS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frB == 0);
|
||||
|
||||
// todo: check for RC
|
||||
|
||||
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr + hCPU->fpr[frB].fpr);
|
||||
if (PPC_PSE)
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FSUBS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frB == 0);
|
||||
|
||||
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr - hCPU->fpr[frB].fpr);
|
||||
if (PPC_PSE)
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FDIVS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frB==0);
|
||||
|
||||
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr / hCPU->fpr[frB].fpr);
|
||||
if( PPC_PSE )
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FMULS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
PPC_ASSERT(frB == 0);
|
||||
|
||||
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr * roundTo25BitAccuracy(hCPU->fpr[frC].fpr));
|
||||
if (PPC_PSE)
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FMADDS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
|
||||
hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr * roundTo25BitAccuracy(hCPU->fpr[frC].fpr) + hCPU->fpr[frB].fpr);
|
||||
if (PPC_PSE)
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FNMADDS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
|
||||
hCPU->fpr[frD].fpr = (float)-(hCPU->fpr[frA].fpr * roundTo25BitAccuracy(hCPU->fpr[frC].fpr) + hCPU->fpr[frB].fpr);
|
||||
if (PPC_PSE)
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FMSUBS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
|
||||
hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) - hCPU->fpr[frB].fp0);
|
||||
if (PPC_PSE)
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FNMSUBS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int frD, frA, frB, frC;
|
||||
PPC_OPC_TEMPL_A(Opcode, frD, frA, frB, frC);
|
||||
|
||||
hCPU->fpr[frD].fp0 = (float)-(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) - hCPU->fpr[frB].fp0);
|
||||
if (PPC_PSE)
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
// Compare
|
||||
|
||||
void PPCInterpreter_FCMPO(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int crfD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, crfD, frA, frB);
|
||||
crfD >>= 2;
|
||||
hCPU->cr[crfD*4+0] = 0;
|
||||
hCPU->cr[crfD*4+1] = 0;
|
||||
hCPU->cr[crfD*4+2] = 0;
|
||||
hCPU->cr[crfD*4+3] = 0;
|
||||
|
||||
uint32 c;
|
||||
if(IS_NAN(hCPU->fpr[frA].guint) || IS_NAN(hCPU->fpr[frB].guint))
|
||||
{
|
||||
c = 1;
|
||||
hCPU->cr[crfD*4+CR_BIT_SO] = 1;
|
||||
}
|
||||
else if(hCPU->fpr[frA].fpr < hCPU->fpr[frB].fpr)
|
||||
{
|
||||
c = 8;
|
||||
hCPU->cr[crfD*4+CR_BIT_LT] = 1;
|
||||
}
|
||||
else if(hCPU->fpr[frA].fpr > hCPU->fpr[frB].fpr)
|
||||
{
|
||||
c = 4;
|
||||
hCPU->cr[crfD*4+CR_BIT_GT] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = 2;
|
||||
hCPU->cr[crfD*4+CR_BIT_EQ] = 1;
|
||||
}
|
||||
|
||||
hCPU->fpscr = (hCPU->fpscr & 0xffff0fff) | (c << 12);
|
||||
|
||||
if (IS_SNAN (hCPU->fpr[frA].guint) || IS_SNAN (hCPU->fpr[frB].guint))
|
||||
hCPU->fpscr |= FPSCR_VXSNAN;
|
||||
else if (!(hCPU->fpscr & FPSCR_VE) || IS_QNAN (hCPU->fpr[frA].guint) || IS_QNAN (hCPU->fpr[frB].guint))
|
||||
hCPU->fpscr |= FPSCR_VXVC;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_FCMPU(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
int crfD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(Opcode, crfD, frA, frB);
|
||||
cemu_assert_debug((crfD % 4) == 0);
|
||||
fcmpu_espresso(hCPU, crfD, hCPU->fpr[frA].fp0, hCPU->fpr[frB].fp0);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
64
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp
Normal file
64
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp
Normal file
|
@ -0,0 +1,64 @@
|
|||
#include "../PPCState.h"
|
||||
#include "PPCInterpreterInternal.h"
|
||||
#include "PPCInterpreterHelper.h"
|
||||
|
||||
std::unordered_set<std::string> sUnsupportedHLECalls;
|
||||
|
||||
void PPCInterpreter_handleUnsupportedHLECall(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
const char* libFuncName = (char*)memory_getPointerFromVirtualOffset(hCPU->instructionPointer + 8);
|
||||
std::string tempString = fmt::format("Unsupported lib call: {}", libFuncName);
|
||||
if (sUnsupportedHLECalls.find(tempString) == sUnsupportedHLECalls.end())
|
||||
{
|
||||
cemuLog_log(LogType::UnsupportedAPI, "{}", tempString);
|
||||
sUnsupportedHLECalls.emplace(tempString);
|
||||
}
|
||||
hCPU->gpr[3] = 0;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
std::vector<void(*)(PPCInterpreter_t* hCPU)>* sPPCHLETable{};
|
||||
|
||||
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall)
|
||||
{
|
||||
if (!sPPCHLETable)
|
||||
sPPCHLETable = new std::vector<void(*)(PPCInterpreter_t* hCPU)>();
|
||||
for (sint32 i = 0; i < sPPCHLETable->size(); i++)
|
||||
{
|
||||
if ((*sPPCHLETable)[i] == hleCall)
|
||||
return i;
|
||||
}
|
||||
HLEIDX newFuncIndex = (sint32)sPPCHLETable->size();
|
||||
sPPCHLETable->resize(sPPCHLETable->size() + 1);
|
||||
(*sPPCHLETable)[newFuncIndex] = hleCall;
|
||||
return newFuncIndex;
|
||||
}
|
||||
|
||||
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex)
|
||||
{
|
||||
if (funcIndex < 0 || funcIndex >= sPPCHLETable->size())
|
||||
return nullptr;
|
||||
return sPPCHLETable->data()[funcIndex];
|
||||
}
|
||||
|
||||
std::mutex g_hleLogMutex;
|
||||
|
||||
void PPCInterpreter_virtualHLE(PPCInterpreter_t* hCPU, unsigned int opcode)
|
||||
{
|
||||
uint32 hleFuncId = opcode & 0xFFFF;
|
||||
if (hleFuncId == 0xFFD0)
|
||||
{
|
||||
g_hleLogMutex.lock();
|
||||
PPCInterpreter_handleUnsupportedHLECall(hCPU);
|
||||
g_hleLogMutex.unlock();
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// os lib function
|
||||
cemu_assert(hleFuncId < sPPCHLETable->size());
|
||||
auto hleCall = (*sPPCHLETable)[hleFuncId];
|
||||
cemu_assert(hleCall);
|
||||
hleCall(hCPU);
|
||||
}
|
||||
}
|
189
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h
Normal file
189
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h
Normal file
|
@ -0,0 +1,189 @@
|
|||
|
||||
static uint32 ppc_cmp_and_mask[8] = {
|
||||
0xfffffff0,
|
||||
0xffffff0f,
|
||||
0xfffff0ff,
|
||||
0xffff0fff,
|
||||
0xfff0ffff,
|
||||
0xff0fffff,
|
||||
0xf0ffffff,
|
||||
0x0fffffff,
|
||||
};
|
||||
|
||||
|
||||
#define ppc_word_rotl(_data, _n) (_rotl(_data,(_n)&0x1F))
|
||||
|
||||
static inline uint32 ppc_mask(int MB, int ME)
|
||||
{
|
||||
uint32 maskMB = 0xFFFFFFFF >> MB;
|
||||
uint32 maskME = 0xFFFFFFFF << (31-ME);
|
||||
uint32 mask2 = (MB <= ME) ? maskMB & maskME : maskMB | maskME;
|
||||
return mask2;
|
||||
}
|
||||
|
||||
static inline bool ppc_carry_3(uint32 a, uint32 b, uint32 c)
|
||||
{
|
||||
if ((a+b) < a) {
|
||||
return true;
|
||||
}
|
||||
if ((a+b+c) < c) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define PPC_getBits(__value, __index, __bitCount) ((__value>>(31-__index))&((1<<__bitCount)-1))
|
||||
|
||||
const static float LD_SCALE[] = {
|
||||
1.000000f, 0.500000f, 0.250000f, 0.125000f, 0.062500f, 0.031250f, 0.015625f,
|
||||
0.007813f, 0.003906f, 0.001953f, 0.000977f, 0.000488f, 0.000244f, 0.000122f,
|
||||
0.000061f, 0.000031f, 0.000015f, 0.000008f, 0.000004f, 0.000002f, 0.000001f,
|
||||
0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
|
||||
0.000000f, 0.000000f, 0.000000f, 0.000000f, 4294967296.000000f, 2147483648.000000f,
|
||||
1073741824.000000f, 536870912.000000f, 268435456.000000f, 134217728.000000f, 67108864.000000f,
|
||||
33554432.000000f, 16777216.000000f, 8388608.000000f, 4194304.000000f, 2097152.000000f, 1048576.000000f,
|
||||
524288.000000f, 262144.000000f, 131072.000000f, 65536.000000f, 32768.000000f, 16384.000000f,
|
||||
8192.000000f, 4096.000000f, 2048.000000f, 1024.000000f, 512.000000f, 256.000000f, 128.000000f, 64.000000f, 32.000000f,
|
||||
16.000000f, 8.000000f, 4.000000f, 2.000000f };
|
||||
|
||||
const static float ST_SCALE[] = {
|
||||
1.000000f, 2.000000f, 4.000000f, 8.000000f,
|
||||
16.000000f, 32.000000f, 64.000000f, 128.000000f,
|
||||
256.000000f, 512.000000f, 1024.000000f, 2048.000000f,
|
||||
4096.000000f, 8192.000000f, 16384.000000f, 32768.000000f,
|
||||
65536.000000f, 131072.000000f, 262144.000000f, 524288.000000f,
|
||||
1048576.000000f, 2097152.000000f, 4194304.000000f, 8388608.000000f,
|
||||
16777216.000000f, 33554432.000000f, 67108864.000000f, 134217728.000000f,
|
||||
268435456.000000f, 536870912.000000f, 1073741824.000000f, 2147483648.000000f,
|
||||
0.000000f, 0.000000f, 0.000000f, 0.000000f,
|
||||
0.000000f, 0.000000f, 0.000000f, 0.000000f,
|
||||
0.000000f, 0.000000f, 0.000000f, 0.000000f,
|
||||
0.000001f, 0.000002f, 0.000004f, 0.000008f,
|
||||
0.000015f, 0.000031f, 0.000061f, 0.000122f,
|
||||
0.000244f, 0.000488f, 0.000977f, 0.001953f,
|
||||
0.003906f, 0.007813f, 0.015625f, 0.031250f,
|
||||
0.062500f, 0.125000f, 0.250000f, 0.500000f };
|
||||
|
||||
static float dequantize(uint32 data, sint32 type, uint8 scale)
|
||||
{
|
||||
float f;
|
||||
switch (type)
|
||||
{
|
||||
case 4: // u8
|
||||
f = (float)(uint8)data;
|
||||
f *= LD_SCALE[scale];
|
||||
break;
|
||||
case 5: // u16
|
||||
f = (float)(uint16)data;
|
||||
f *= LD_SCALE[scale];
|
||||
break;
|
||||
case 6: // s8
|
||||
f = (float)(sint8)data;
|
||||
f *= LD_SCALE[scale];
|
||||
break;
|
||||
case 7: // float
|
||||
f = (float)(sint16)data;
|
||||
f *= LD_SCALE[scale];
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
f = *((float *)&data);
|
||||
// scale does not apply when loading floats
|
||||
break;
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
static uint32 quantize(float data, sint32 type, uint8 scale)
|
||||
{
|
||||
uint32 val;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 4: // u8
|
||||
data *= ST_SCALE[scale];
|
||||
if (data < 0) data = 0;
|
||||
if (data > 255) data = 255;
|
||||
val = (uint8)(uint32)data;
|
||||
break;
|
||||
case 5: // u16
|
||||
data *= ST_SCALE[scale];
|
||||
if (data < 0) data = 0;
|
||||
if (data > 65535) data = 65535;
|
||||
val = (uint16)(uint32)data;
|
||||
break;
|
||||
case 6: // s8
|
||||
data *= ST_SCALE[scale];
|
||||
if (data < -128) data = -128;
|
||||
if (data > 127) data = 127;
|
||||
val = (sint8)(uint8)(sint32)(uint32)data;
|
||||
break;
|
||||
case 7: // s16
|
||||
data *= ST_SCALE[scale];
|
||||
if (data < -32768) data = -32768;
|
||||
if (data > 32767) data = 32767;
|
||||
val = (sint16)(uint16)(sint32)(uint32)data;
|
||||
break;
|
||||
case 0: // float
|
||||
default:
|
||||
// scale does not apply when storing floats
|
||||
*((float*)&val) = data;
|
||||
break;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
#define _uint32_fastSignExtend(__v, __bits) (uint32)(((sint32)(__v)<<(31-(__bits)))>>(31-(__bits)));
|
||||
|
||||
static inline uint64 ConvertToDoubleNoFTZ(uint32 value)
|
||||
{
|
||||
// http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
|
||||
|
||||
uint64 x = value;
|
||||
uint64 exp = (x >> 23) & 0xff;
|
||||
uint64 frac = x & 0x007fffff;
|
||||
|
||||
if (exp > 0 && exp < 255)
|
||||
{
|
||||
uint64 y = !(exp >> 7);
|
||||
uint64 z = y << 61 | y << 60 | y << 59;
|
||||
return ((x & 0xc0000000) << 32) | z | ((x & 0x3fffffff) << 29);
|
||||
}
|
||||
else if (exp == 0 && frac != 0) // denormal
|
||||
{
|
||||
exp = 1023 - 126;
|
||||
do
|
||||
{
|
||||
frac <<= 1;
|
||||
exp -= 1;
|
||||
} while ((frac & 0x00800000) == 0);
|
||||
|
||||
return ((x & 0x80000000) << 32) | (exp << 52) | ((frac & 0x007fffff) << 29);
|
||||
}
|
||||
else // QNaN, SNaN or Zero
|
||||
{
|
||||
uint64 y = exp >> 7;
|
||||
uint64 z = y << 61 | y << 60 | y << 59;
|
||||
return ((x & 0xc0000000) << 32) | z | ((x & 0x3fffffff) << 29);
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32 ConvertToSingleNoFTZ(uint64 x)
|
||||
{
|
||||
uint32 exp = (x >> 52) & 0x7ff;
|
||||
if (exp > 896 || (x & ~0x8000000000000000ULL) == 0)
|
||||
{
|
||||
return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff);
|
||||
}
|
||||
else if (exp >= 874)
|
||||
{
|
||||
uint32 t = (uint32)(0x80000000 | ((x & 0x000FFFFFFFFFFFFFULL) >> 21));
|
||||
t = t >> (905 - exp);
|
||||
t |= (x >> 32) & 0x80000000;
|
||||
return t;
|
||||
}
|
||||
else
|
||||
{
|
||||
return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff);
|
||||
}
|
||||
}
|
1231
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp
Normal file
1231
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp
Normal file
File diff suppressed because it is too large
Load diff
300
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
Normal file
300
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
Normal file
|
@ -0,0 +1,300 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Espresso/PPCState.h"
|
||||
|
||||
// SPR constants
|
||||
#define SPR_XER 1
|
||||
#define SPR_LR 8
|
||||
#define SPR_CTR 9
|
||||
#define SPR_DEC 22
|
||||
#define SPR_SRR0 26
|
||||
#define SPR_SRR1 27
|
||||
#define SPR_HID0 1008
|
||||
#define SPR_HID1 1009
|
||||
#define SPR_HID2 920
|
||||
#define SPR_TBL 268
|
||||
#define SPR_TBU 269
|
||||
#define SPR_DMAU 922
|
||||
#define SPR_DMAL 923
|
||||
|
||||
// graphics quantization registers
|
||||
#define SPR_GQR0 912
|
||||
#define SPR_GQR1 913
|
||||
#define SPR_GQR2 914
|
||||
#define SPR_GQR3 915
|
||||
#define SPR_GQR4 916
|
||||
#define SPR_GQR5 917
|
||||
#define SPR_GQR6 918
|
||||
#define SPR_GQR7 919
|
||||
|
||||
// user graphics quantization registers
|
||||
#define SPR_UGQR0 896
|
||||
#define SPR_UGQR1 897
|
||||
#define SPR_UGQR2 898
|
||||
#define SPR_UGQR3 899
|
||||
#define SPR_UGQR4 900
|
||||
#define SPR_UGQR5 901
|
||||
#define SPR_UGQR6 902
|
||||
#define SPR_UGQR7 903
|
||||
|
||||
#define SPR_FPECR 1022 // used by the OS to store values
|
||||
|
||||
#define SPR_PVR 287 // processor version, for Wii U this must be 0x7001xxxx - this register is only readable
|
||||
#define SPR_UPIR 1007 // core index
|
||||
#define SPR_SCR 947 // core control
|
||||
#define SPR_SDR1 25
|
||||
|
||||
// reversed CR bit indices
|
||||
#define CR_BIT_LT 0
|
||||
#define CR_BIT_GT 1
|
||||
#define CR_BIT_EQ 2
|
||||
#define CR_BIT_SO 3
|
||||
|
||||
#define XER_SO (1<<31) // summary overflow bit
|
||||
#define XER_OV (1<<30) // overflow bit
|
||||
#define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8
|
||||
|
||||
// FPSCR
|
||||
#define FPSCR_VXSNAN (1<<24)
|
||||
#define FPSCR_VXVC (1<<19)
|
||||
|
||||
#define MSR_SF (1<<31)
|
||||
#define MSR_UNKNOWN (1<<30)
|
||||
#define MSR_UNKNOWN2 (1<<27)
|
||||
#define MSR_VEC (1<<25)
|
||||
#define MSR_POW (1<<18)
|
||||
#define MSR_TGPR (1<<15)
|
||||
#define MSR_ILE (1<<16)
|
||||
#define MSR_EE (1<<15)
|
||||
#define MSR_PR (1<<14)
|
||||
#define MSR_FP (1<<13)
|
||||
#define MSR_ME (1<<12)
|
||||
#define MSR_FE0 (1<<11)
|
||||
#define MSR_SE (1<<10)
|
||||
#define MSR_BE (1<<9)
|
||||
#define MSR_FE1 (1<<8)
|
||||
#define MSR_IP (1<<6)
|
||||
#define MSR_IR (1<<5)
|
||||
#define MSR_DR (1<<4)
|
||||
#define MSR_PM (1<<2)
|
||||
#define MSR_RI (1<<1)
|
||||
#define MSR_LE (1)
|
||||
|
||||
// helpers
|
||||
|
||||
#define GET_MSR_BIT(__bit) ((hCPU->sprExtended.msr&(__bit))!=0)
|
||||
|
||||
#define opHasRC() ((opcode & PPC_OPC_RC) != 0)
|
||||
|
||||
// assume fixed values for PSE/LSQE. This optimization is possible because Wii U applications run only in user mode (todo - handle this correctly in LLE mode)
|
||||
//#define PPC_LSQE (hCPU->LSQE)
|
||||
//#define PPC_PSE (hCPU->PSE)
|
||||
|
||||
#define PPC_LSQE (1)
|
||||
#define PPC_PSE (1)
|
||||
|
||||
#define PPC_ASSERT(v)
|
||||
|
||||
#define PPC_OPC_RC 1
|
||||
#define PPC_OPC_OE (1<<10)
|
||||
#define PPC_OPC_LK 1
|
||||
#define PPC_OPC_AA (1<<1)
|
||||
|
||||
#define PPC_OPC_TEMPL_A(opc, rD, rA, rB, rC) {rD=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;rB=((opc)>>11)&0x1f;rC=((opc)>>6)&0x1f;}
|
||||
#define PPC_OPC_TEMPL_B(opc, BO, BI, BD) {BO=((opc)>>21)&0x1f;BI=((opc)>>16)&0x1f;BD=(uint32)(sint32)(sint16)((opc)&0xfffc);}
|
||||
#define PPC_OPC_TEMPL_D_SImm(opc, rD, rA, imm) {rD=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;imm=(uint32)(sint32)(sint16)((opc)&0xffff);}
|
||||
#define PPC_OPC_TEMPL_D_UImm(opc, rD, rA, imm) {rD=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;imm=(opc)&0xffff;}
|
||||
#define PPC_OPC_TEMPL_D_Shift16(opc, rD, rA, imm) {rD=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;imm=(opc)<<16;}
|
||||
#define PPC_OPC_TEMPL_I(opc, LI) {LI=(opc)&0x3fffffc;if (LI&0x02000000) LI |= 0xfc000000;}
|
||||
#define PPC_OPC_TEMPL_M(opc, rS, rA, SH, MB, ME) {rS=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;SH=((opc)>>11)&0x1f;MB=((opc)>>6)&0x1f;ME=((opc)>>1)&0x1f;}
|
||||
#define PPC_OPC_TEMPL_X(opc, rS, rA, rB) {rS=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;rB=((opc)>>11)&0x1f;}
|
||||
#define PPC_OPC_TEMPL_XFX(opc, rS, CRM) {rS=((opc)>>21)&0x1f;CRM=((opc)>>12)&0xff;}
|
||||
#define PPC_OPC_TEMPL_XO(opc, rS, rA, rB) {rS=((opc)>>21)&0x1f;rA=((opc)>>16)&0x1f;rB=((opc)>>11)&0x1f;}
|
||||
#define PPC_OPC_TEMPL_XL(opc, BO, BI, BD) {BO=((opc)>>21)&0x1f;BI=((opc)>>16)&0x1f;BD=((opc)>>11)&0x1f;}
|
||||
#define PPC_OPC_TEMPL_XFL(opc, rB, FM) {rB=((opc)>>11)&0x1f;FM=((opc)>>17)&0xff;}
|
||||
|
||||
#define PPC_OPC_TEMPL3_XO() sint32 rD, rA, rB; rD=((opcode)>>21)&0x1f;rA=((opcode)>>16)&0x1f;rB=((opcode)>>11)&0x1f
|
||||
#define PPC_OPC_TEMPL_X_CR() sint32 crD, crA, crB; crD=((opcode)>>21)&0x1f;crA=((opcode)>>16)&0x1f;crB=((opcode)>>11)&0x1f
|
||||
|
||||
static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r)
|
||||
{
|
||||
hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0;
|
||||
hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0);
|
||||
hCPU->cr[CR_BIT_EQ] = (r == 0);
|
||||
hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0
|
||||
}
|
||||
|
||||
static inline uint8 ppc_getCRBit(PPCInterpreter_t* hCPU, uint32 r)
|
||||
{
|
||||
return hCPU->cr[r];
|
||||
}
|
||||
|
||||
static inline bool ppc_MTCRFMaskHasCRFieldSet(const uint32 mtcrfMask, const uint32 crIndex)
|
||||
{
|
||||
// 1000 0000 (0x80) -> cr0
|
||||
// 0000 0001 (0x01) -> cr7
|
||||
return (mtcrfMask & (1 << (7 - crIndex))) != 0;
|
||||
}
|
||||
|
||||
// returns CR mask with CR0.LT in LSB
|
||||
static inline uint32 ppc_MTCRFMaskToCRBitMask(const uint32 mtcrfMask)
|
||||
{
|
||||
uint32 crMask = 0;
|
||||
for (uint32 crF = 0; crF < 8; crF++)
|
||||
{
|
||||
if (ppc_MTCRFMaskHasCRFieldSet(mtcrfMask, crF))
|
||||
crMask |= (0xF << (crF * 4));
|
||||
}
|
||||
return crMask;
|
||||
}
|
||||
|
||||
static inline void ppc_setCRBit(PPCInterpreter_t* hCPU, uint32 r, uint8 v)
|
||||
{
|
||||
hCPU->cr[r] = v;
|
||||
}
|
||||
|
||||
static inline void ppc_setCR(PPCInterpreter_t* hCPU, uint32 cr)
|
||||
{
|
||||
uint32 tempCr = cr;
|
||||
for (sint32 i = 31; i >= 0; i--)
|
||||
{
|
||||
ppc_setCRBit(hCPU, i, tempCr & 1);
|
||||
tempCr >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32 ppc_getCR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
uint32 cr = 0;
|
||||
for (sint32 i = 0; i < 32; i++)
|
||||
{
|
||||
cr <<= 1;
|
||||
if (ppc_getCRBit(hCPU, i))
|
||||
cr |= 1;
|
||||
}
|
||||
return cr;
|
||||
}
|
||||
|
||||
// FPU helper
|
||||
|
||||
#define IS_NAN(X) ((((X) & 0x000fffffffffffffULL) != 0) && (((X) & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL))
|
||||
#define IS_QNAN(X) ((((X) & 0x000fffffffffffffULL) != 0) && (((X) & 0x7ff8000000000000ULL) == 0x7ff8000000000000ULL))
|
||||
#define IS_SNAN(X) ((((X) & 0x000fffffffffffffULL) != 0) && (((X) & 0x7ff8000000000000ULL) == 0x7ff0000000000000ULL))
|
||||
|
||||
#define FPSCR_VE (1 << 7)
|
||||
|
||||
inline double roundTo25BitAccuracy(double d)
|
||||
{
|
||||
uint64 v = *(uint64*)&d;
|
||||
v = (v & 0xFFFFFFFFF8000000ULL) + (v & 0x8000000ULL);
|
||||
return *(double*)&v;
|
||||
}
|
||||
|
||||
double fres_espresso(double input);
|
||||
double frsqrte_espresso(double input);
|
||||
|
||||
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
|
||||
|
||||
// OPC
|
||||
void PPCInterpreter_virtualHLE(PPCInterpreter_t* hCPU, unsigned int opcode);
|
||||
|
||||
void PPCInterpreter_MFMSR(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_MTMSR(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_MFTB(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_MTFSB1X(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_MFCR(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_MCRF(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_MTCRF(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_MCRXR(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_TLBIE(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_TLBSYNC(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_DCBT(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_DCBST(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_DCBZL(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_DCBF(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_DCBI(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_DCBZ(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_ICBI(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_EIEIO(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_SC(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_SYNC(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_ISYNC(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_RFI(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_BX(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_BCX(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_BCLRX(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_BCCTR(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
// FPU
|
||||
|
||||
void PPCInterpreter_FCMPO(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FCMPU(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_FMR(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FSEL(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FCTIWZ(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FCTIW(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FNEG(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FRSP(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FRSQRTE(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FRES(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_FABS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FNABS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FADD(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FMUL(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FDIV(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FMADD(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FMSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FMSUBS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FNMADD(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FNMSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_MFFS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_MTFSF(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_FDIVS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FMULS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FADDS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FSUBS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FMADDS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FNMADDS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_FNMSUBS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_PS_MERGE00(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MERGE01(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MERGE10(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MERGE11(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MR(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_NEG(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_ABS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_NABS(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_RES(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_RSQRTE(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_PS_ADD(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_SUB(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MUL(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_DIV(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_PS_MADD(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_NMADD(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MADDS0(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MADDS1(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_NMSUB(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_PS_SEL(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_SUM0(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_SUM1(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MULS0(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_MULS1(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
|
||||
void PPCInterpreter_PS_CMPO0(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_CMPU0(PPCInterpreter_t* hCPU, uint32 Opcode);
|
||||
void PPCInterpreter_PS_CMPU1(PPCInterpreter_t* hCPU, uint32 Opcode);
|
1097
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
Normal file
1097
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
Normal file
File diff suppressed because it is too large
Load diff
107
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
Normal file
107
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
Normal file
|
@ -0,0 +1,107 @@
|
|||
#include "PPCInterpreterInternal.h"
|
||||
#include "Cafe/OS/RPL/rpl.h"
|
||||
#include "Cafe/GameProfile/GameProfile.h"
|
||||
#include "Cafe/HW/Espresso/Debugger/Debugger.h"
|
||||
|
||||
thread_local PPCInterpreter_t* ppcInterpreterCurrentInstance;
|
||||
|
||||
// main thread instruction counter and timing
|
||||
volatile uint64 ppcMainThreadCycleCounter = 0;
|
||||
uint64 ppcMainThreadDECCycleValue = 0; // value that was set to dec register
|
||||
uint64 ppcMainThreadDECCycleStart = 0; // at which cycle the dec register was set, if == 0 -> dec is 0
|
||||
uint64 ppcCyclesSince2000 = 0;
|
||||
uint64 ppcCyclesSince2000TimerClock = 0;
|
||||
uint64 ppcCyclesSince2000_UTC = 0;
|
||||
|
||||
PPCInterpreter_t* PPCInterpreter_createInstance(unsigned int Entrypoint)
|
||||
{
|
||||
PPCInterpreter_t* pData;
|
||||
// create instance
|
||||
uint32 prefixAreaSize = 0x6000; // we need to allocate some bytes before the interpreter struct because the recompiler will use it as stack area (specifically when the exception handler is called)
|
||||
pData = (PPCInterpreter_t*)((uint8*)malloc(sizeof(PPCInterpreter_t)+prefixAreaSize)+prefixAreaSize);
|
||||
memset((void*)pData, 0x00, sizeof(PPCInterpreter_t));
|
||||
// set instruction pointer to entrypoint
|
||||
pData->instructionPointer = (uint32)Entrypoint;
|
||||
// set initial register values
|
||||
pData->gpr[GPR_SP] = 0x00000000;
|
||||
pData->spr.LR = 0;
|
||||
// return instance
|
||||
return pData;
|
||||
}
|
||||
|
||||
PPCInterpreter_t* PPCInterpreter_getCurrentInstance()
|
||||
{
|
||||
return ppcInterpreterCurrentInstance;
|
||||
}
|
||||
|
||||
__declspec(noinline) uint64 PPCInterpreter_getMainCoreCycleCounter()
|
||||
{
|
||||
return PPCTimer_getFromRDTSC();
|
||||
}
|
||||
|
||||
void PPCInterpreter_nextInstruction(PPCInterpreter_t* cpuInterpreter)
|
||||
{
|
||||
cpuInterpreter->instructionPointer += 4;
|
||||
}
|
||||
|
||||
void PPCInterpreter_jumpToInstruction(PPCInterpreter_t* cpuInterpreter, uint32 newIP)
|
||||
{
|
||||
cpuInterpreter->instructionPointer = (uint32)newIP;
|
||||
}
|
||||
|
||||
void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
hCPU->sprExtended.DEC = newValue;
|
||||
ppcMainThreadDECCycleStart = PPCInterpreter_getMainCoreCycleCounter();
|
||||
ppcMainThreadDECCycleValue = newValue;
|
||||
}
|
||||
|
||||
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
uint32 xerValue = hCPU->spr.XER;
|
||||
xerValue &= ~(1<<XER_BIT_CA);
|
||||
if( hCPU->xer_ca )
|
||||
xerValue |= (1<<XER_BIT_CA);
|
||||
return xerValue;
|
||||
}
|
||||
|
||||
void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v)
|
||||
{
|
||||
hCPU->spr.XER = v;
|
||||
hCPU->xer_ca = (v>>XER_BIT_CA)&1;
|
||||
}
|
||||
|
||||
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
return hCPU->spr.UPIR;
|
||||
};
|
||||
|
||||
uint32 PPCInterpreter_getCurrentCoreIndex()
|
||||
{
|
||||
return ppcInterpreterCurrentInstance->spr.UPIR;
|
||||
};
|
||||
|
||||
uint8* PPCInterpreterGetStackPointer()
|
||||
{
|
||||
return memory_getPointerFromVirtualOffset(ppcInterpreterCurrentInstance->gpr[1]);
|
||||
}
|
||||
|
||||
uint8* PPCInterpreterGetAndModifyStackPointer(sint32 offset)
|
||||
{
|
||||
uint8* result = memory_getPointerFromVirtualOffset(ppcInterpreterCurrentInstance->gpr[1] - offset);
|
||||
ppcInterpreterCurrentInstance->gpr[1] -= offset;
|
||||
return result;
|
||||
}
|
||||
|
||||
void PPCInterpreterModifyStackPointer(sint32 offset)
|
||||
{
|
||||
ppcInterpreterCurrentInstance->gpr[1] -= offset;
|
||||
}
|
||||
|
||||
uint32 RPLLoader_MakePPCCallable(void(*ppcCallableExport)(PPCInterpreter_t* hCPU));
|
||||
|
||||
// deprecated wrapper, use RPLLoader_MakePPCCallable directly
|
||||
uint32 PPCInterpreter_makeCallableExportDepr(void (*ppcCallableExport)(PPCInterpreter_t* hCPU))
|
||||
{
|
||||
return RPLLoader_MakePPCCallable(ppcCallableExport);
|
||||
}
|
360
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
Normal file
360
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
Normal file
|
@ -0,0 +1,360 @@
|
|||
#include "../PPCState.h"
|
||||
#include "PPCInterpreterInternal.h"
|
||||
#include "PPCInterpreterHelper.h"
|
||||
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
|
||||
|
||||
#include "../Recompiler/PPCRecompiler.h"
|
||||
#include "../Recompiler/PPCRecompilerX64.h"
|
||||
|
||||
#include <float.h>
|
||||
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
||||
|
||||
void PPCInterpreter_MFMSR(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
forceLogDebug_printf("Rare instruction: MFMSR");
|
||||
if (hCPU->sprExtended.msr & MSR_PR)
|
||||
{
|
||||
PPC_ASSERT(true);
|
||||
return;
|
||||
}
|
||||
int rD, rA, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
|
||||
hCPU->gpr[rD] = hCPU->sprExtended.msr;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
||||
}
|
||||
|
||||
void PPCInterpreter_MTMSR(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
forceLogDebug_printf("Rare instruction: MTMSR");
|
||||
if (hCPU->sprExtended.msr & MSR_PR)
|
||||
{
|
||||
PPC_ASSERT(true);
|
||||
return;
|
||||
}
|
||||
int rS, rA, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
|
||||
hCPU->sprExtended.msr = hCPU->gpr[rS];
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_MTFSB1X(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
forceLogDebug_printf("Rare instruction: MTFSB1X");
|
||||
int crbD, n1, n2;
|
||||
PPC_OPC_TEMPL_X(Opcode, crbD, n1, n2);
|
||||
if (crbD != 1 && crbD != 2)
|
||||
{
|
||||
hCPU->fpscr |= 1 << (31 - crbD);
|
||||
}
|
||||
if (Opcode & PPC_OPC_RC)
|
||||
{
|
||||
// update cr1 flags
|
||||
PPC_ASSERT(true);
|
||||
}
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_MCRF(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
uint32 crD, crS, b;
|
||||
PPC_OPC_TEMPL_X(Opcode, crD, crS, b);
|
||||
crD >>= 2;
|
||||
crS >>= 2;
|
||||
for (sint32 i = 0; i<4; i++)
|
||||
ppc_setCRBit(hCPU, crD * 4 + i, ppc_getCRBit(hCPU, crS * 4 + i));
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_MFCR(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
// frequently used by GCC compiled code (e.g. SM64 port)
|
||||
int rD, rA, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
|
||||
|
||||
// in our array: cr0.LT is entry with index 0
|
||||
// in GPR: cr0.LT is in MSB
|
||||
uint32 cr = 0;
|
||||
for (sint32 i = 0; i < 32; i++)
|
||||
{
|
||||
cr <<= 1;
|
||||
if (ppc_getCRBit(hCPU, i) != 0)
|
||||
cr |= 1;
|
||||
}
|
||||
hCPU->gpr[rD] = cr;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_MTCRF(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
// frequently used by GCC compiled code (e.g. SM64 port)
|
||||
// tested
|
||||
|
||||
uint32 rS;
|
||||
uint32 crfMask;
|
||||
PPC_OPC_TEMPL_XFX(Opcode, rS, crfMask);
|
||||
|
||||
for (sint32 crIndex = 0; crIndex < 8; crIndex++)
|
||||
{
|
||||
if (!ppc_MTCRFMaskHasCRFieldSet(crfMask, crIndex))
|
||||
continue;
|
||||
|
||||
uint32 crBitBase = crIndex * 4;
|
||||
uint8 nibble = (uint8)(hCPU->gpr[rS] >> (28 - crIndex * 4));
|
||||
ppc_setCRBit(hCPU, crBitBase + 0, (nibble >> 3) & 1);
|
||||
ppc_setCRBit(hCPU, crBitBase + 1, (nibble >> 2) & 1);
|
||||
ppc_setCRBit(hCPU, crBitBase + 2, (nibble >> 1) & 1);
|
||||
ppc_setCRBit(hCPU, crBitBase + 3, (nibble >> 0) & 1);
|
||||
}
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_MCRXR(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
// used in Dont Starve: Giant Edition
|
||||
// also used frequently by Web Browser (webkit?)
|
||||
uint32 cr;
|
||||
cr = (Opcode >> (31 - 8)) & 7;
|
||||
cr >>= 2;
|
||||
|
||||
uint32 xer = PPCInterpreter_getXER(hCPU);
|
||||
uint32 xerBits = (xer >> 28) & 0xF;
|
||||
|
||||
// todo - is the order correct?
|
||||
ppc_setCRBit(hCPU, cr * 4 + 0, (xerBits >> 0) & 1);
|
||||
ppc_setCRBit(hCPU, cr * 4 + 1, (xerBits >> 1) & 1);
|
||||
ppc_setCRBit(hCPU, cr * 4 + 2, (xerBits >> 2) & 1);
|
||||
ppc_setCRBit(hCPU, cr * 4 + 3, (xerBits >> 3) & 1);
|
||||
|
||||
// reset copied bits
|
||||
PPCInterpreter_setXER(hCPU, xer&~0xF0000000);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_TLBIE(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rS, rA, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_TLBSYNC(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
// branch instructions
|
||||
|
||||
void PPCInterpreter_BX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
uint32 li;
|
||||
PPC_OPC_TEMPL_I(Opcode, li);
|
||||
if ((Opcode & PPC_OPC_AA) == 0)
|
||||
li += (unsigned int)hCPU->instructionPointer;
|
||||
if (Opcode & PPC_OPC_LK)
|
||||
{
|
||||
// update LR and IP
|
||||
hCPU->spr.LR = (unsigned int)hCPU->instructionPointer + 4;
|
||||
hCPU->instructionPointer = li;
|
||||
PPCInterpreter_jumpToInstruction(hCPU, li);
|
||||
PPCRecompiler_attemptEnter(hCPU, li);
|
||||
return;
|
||||
}
|
||||
PPCInterpreter_jumpToInstruction(hCPU, li);
|
||||
}
|
||||
|
||||
|
||||
void PPCInterpreter_BCX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
uint32 BO, BI, BD;
|
||||
PPC_OPC_TEMPL_B(Opcode, BO, BI, BD);
|
||||
if (!(BO & 4))
|
||||
hCPU->spr.CTR--;
|
||||
bool bo2 = (BO & 2) != 0;
|
||||
bool bo8 = (BO & 8) != 0; // branch condition true
|
||||
bool cr = ppc_getCRBit(hCPU, BI) != 0;
|
||||
if (((BO & 4) || ((hCPU->spr.CTR != 0) ^ bo2))
|
||||
&& ((BO & 16) || (!(cr ^ bo8))))
|
||||
{
|
||||
if (!(Opcode & PPC_OPC_AA))
|
||||
{
|
||||
BD += (unsigned int)hCPU->instructionPointer;
|
||||
}
|
||||
else
|
||||
{
|
||||
// should never happen
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
if (Opcode & PPC_OPC_LK)
|
||||
hCPU->spr.LR = ((unsigned int)hCPU->instructionPointer) + 4;
|
||||
PPCInterpreter_jumpToInstruction(hCPU, BD);
|
||||
}
|
||||
else
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_BCLRX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
uint32 BO, BI, BD;
|
||||
PPC_OPC_TEMPL_XL(Opcode, BO, BI, BD);
|
||||
PPC_ASSERT(BD == 0);
|
||||
if (!(BO & 4))
|
||||
{
|
||||
if (hCPU->spr.CTR == 0)
|
||||
{
|
||||
PPC_ASSERT(true);
|
||||
forceLogDebug_printf("Decrementer underflow!\n");
|
||||
}
|
||||
hCPU->spr.CTR--;
|
||||
}
|
||||
bool bo2 = (BO & 2) ? true : false;
|
||||
bool bo8 = (BO & 8) ? true : false;
|
||||
bool cr = ppc_getCRBit(hCPU, BI) != 0;
|
||||
if (((BO & 4) || ((hCPU->spr.CTR != 0) ^ bo2))
|
||||
&& ((BO & 16) || (!(cr ^ bo8))))
|
||||
{
|
||||
BD = hCPU->spr.LR & 0xfffffffc;
|
||||
if (Opcode & PPC_OPC_LK)
|
||||
{
|
||||
hCPU->spr.LR = (unsigned int)hCPU->instructionPointer + 4;
|
||||
}
|
||||
PPCInterpreter_jumpToInstruction(hCPU, BD);
|
||||
PPCRecompiler_attemptEnter(hCPU, BD);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
BD = (unsigned int)hCPU->instructionPointer + 4;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCInterpreter_BCCTR(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
uint32 x = (unsigned int)hCPU->instructionPointer;
|
||||
uint32 BO, BI, BD;
|
||||
PPC_OPC_TEMPL_XL(Opcode, BO, BI, BD);
|
||||
PPC_ASSERT(BD == 0);
|
||||
PPC_ASSERT(!(BO & 2));
|
||||
bool bo8 = (BO & 8) ? true : false;
|
||||
bool cr = ppc_getCRBit(hCPU, BI) != 0;
|
||||
if ((BO & 16) || (!(cr ^ bo8)))
|
||||
{
|
||||
if (Opcode & PPC_OPC_LK)
|
||||
{
|
||||
hCPU->spr.LR = (unsigned int)hCPU->instructionPointer + 4;
|
||||
hCPU->instructionPointer = (unsigned int)(hCPU->spr.CTR & 0xfffffffc);
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->instructionPointer = (unsigned int)(hCPU->spr.CTR & 0xfffffffc);
|
||||
}
|
||||
PPCRecompiler_attemptEnter(hCPU, hCPU->instructionPointer);
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->instructionPointer += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCInterpreter_DCBT(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
sint32 rA, rB;
|
||||
rA = (Opcode >> (31 - 15)) & 0x1F;
|
||||
rB = (Opcode >> (31 - 20)) & 0x1F;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_DCBST(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
sint32 rA, rB;
|
||||
rA = (Opcode >> (31 - 15)) & 0x1F;
|
||||
rB = (Opcode >> (31 - 20)) & 0x1F;
|
||||
|
||||
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB];
|
||||
|
||||
LatteBufferCache_notifyDCFlush(ea, 32);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_DCBF(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
sint32 rA, rB;
|
||||
rA = (Opcode >> (31 - 15)) & 0x1F;
|
||||
rB = (Opcode >> (31 - 20)) & 0x1F;
|
||||
|
||||
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB];
|
||||
|
||||
LatteBufferCache_notifyDCFlush(ea, 32);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_DCBZL(PPCInterpreter_t* hCPU, uint32 Opcode) //Undocumented
|
||||
{
|
||||
// no-op
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_DCBI(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
// no-op
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_ICBI(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
sint32 rD, rA, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
|
||||
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB];
|
||||
// invalidate range
|
||||
coreinit::codeGenHandleICBI(ea);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_EIEIO(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
// no effect
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_SC(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
forceLogDebug_printf("SC executed at 0x%08x", hCPU->instructionPointer);
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_SYNC(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
// no-op
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_ISYNC(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
// no-op
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_RFI(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
forceLogDebug_printf("RFI");
|
||||
hCPU->sprExtended.msr &= ~(0x87C0FF73 | 0x00040000);
|
||||
hCPU->sprExtended.msr |= hCPU->sprExtended.srr1 & 0x87c0ff73;
|
||||
hCPU->sprExtended.msr |= MSR_RI;
|
||||
hCPU->instructionPointer = (unsigned int)(hCPU->sprExtended.srr0);
|
||||
}
|
73
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp
Normal file
73
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp
Normal file
|
@ -0,0 +1,73 @@
|
|||
|
||||
static void PPCInterpreter_MFSPR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 rD, spr1, spr2, spr;
|
||||
PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
|
||||
spr = spr1 | (spr2 << 5);
|
||||
// copy SPR
|
||||
hCPU->gpr[rD] = PPCSpr_get(hCPU, spr);
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MTSPR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 rD, spr1, spr2, spr;
|
||||
PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
|
||||
spr = spr1 | (spr2 << 5);
|
||||
PPCSpr_set(hCPU, spr, hCPU->gpr[rD]);
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MFSR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 rD, SR, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, rD, SR, rB);
|
||||
hCPU->gpr[rD] = getSR(hCPU, SR & 0xF);
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MTSR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 rS, SR, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, rS, SR, rB);
|
||||
setSR(hCPU, SR&0xF, hCPU->gpr[rS]);
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_MFTB(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
uint32 rD, spr1, spr2, spr;
|
||||
// get SPR ID
|
||||
PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
|
||||
spr = spr1 | (spr2 << 5);
|
||||
// get core cycle counter
|
||||
uint64 coreTime = ppcItpCtrl::getTB(hCPU);
|
||||
|
||||
switch (spr)
|
||||
{
|
||||
case 268: // TBL
|
||||
hCPU->gpr[rD] = (uint32)(coreTime & 0xFFFFFFFF);
|
||||
break;
|
||||
case 269: // TBU
|
||||
hCPU->gpr[rD] = (uint32)((coreTime >> 32) & 0xFFFFFFFF);
|
||||
break;
|
||||
default:
|
||||
assert_dbg();
|
||||
}
|
||||
// next instruction
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_TW(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
sint32 to, rA, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, to, rB, rA);
|
||||
|
||||
cemu_assert_debug(to == 0);
|
||||
|
||||
debugger_enterTW(hCPU);
|
||||
}
|
506
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterPS.cpp
Normal file
506
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterPS.cpp
Normal file
|
@ -0,0 +1,506 @@
|
|||
#include "PPCInterpreterInternal.h"
|
||||
|
||||
// Gekko paired single math
|
||||
|
||||
void PPCInterpreter_PS_ADD(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp0);
|
||||
hCPU->fpr[frD].fp1 = (float)(hCPU->fpr[frA].fp1 + hCPU->fpr[frB].fp1);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_SUB(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 - hCPU->fpr[frB].fp0);
|
||||
hCPU->fpr[frD].fp1 = (float)(hCPU->fpr[frA].fp1 - hCPU->fpr[frB].fp1);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MUL(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0 = flushDenormalToZero((float)(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0)));
|
||||
hCPU->fpr[frD].fp1 = flushDenormalToZero((float)(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1)));
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_DIV(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 / hCPU->fpr[frB].fp0);
|
||||
hCPU->fpr[frD].fp1 = (float)(hCPU->fpr[frA].fp1 / hCPU->fpr[frB].fp1);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
|
||||
void PPCInterpreter_PS_MADD(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
float s0 = (float)((float)(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0)) + hCPU->fpr[frB].fp0);
|
||||
float s1 = (float)((float)(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1)) + hCPU->fpr[frB].fp1);
|
||||
|
||||
hCPU->fpr[frD].fp0 = flushDenormalToZero(s0);
|
||||
hCPU->fpr[frD].fp1 = flushDenormalToZero(s1);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_NMADD(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
float s0 = (float)-(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) + hCPU->fpr[frB].fp0);
|
||||
float s1 = (float)-(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1) + hCPU->fpr[frB].fp1);
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode >> 6) & 0x1F;
|
||||
frB = (Opcode >> 11) & 0x1F;
|
||||
frA = (Opcode >> 16) & 0x1F;
|
||||
frD = (Opcode >> 21) & 0x1F;
|
||||
|
||||
float s0 = (float)(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) - hCPU->fpr[frB].fp0);
|
||||
float s1 = (float)(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1) - hCPU->fpr[frB].fp1);
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_NMSUB(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode >> 6) & 0x1F;
|
||||
frB = (Opcode >> 11) & 0x1F;
|
||||
frA = (Opcode >> 16) & 0x1F;
|
||||
frD = (Opcode >> 21) & 0x1F;
|
||||
|
||||
float s0 = (float)-(hCPU->fpr[frA].fp0 * roundTo25BitAccuracy(hCPU->fpr[frC].fp0) - hCPU->fpr[frB].fp0);
|
||||
float s1 = (float)-(hCPU->fpr[frA].fp1 * roundTo25BitAccuracy(hCPU->fpr[frC].fp1) - hCPU->fpr[frB].fp1);
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MADDS0(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
double c = roundTo25BitAccuracy(hCPU->fpr[frC].fp0);
|
||||
float s0 = (float)(hCPU->fpr[frA].fp0 * c + hCPU->fpr[frB].fp0);
|
||||
float s1 = (float)(hCPU->fpr[frA].fp1 * c + hCPU->fpr[frB].fp1);
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MADDS1(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
double c = roundTo25BitAccuracy(hCPU->fpr[frC].fp1);
|
||||
float s0 = (float)(hCPU->fpr[frA].fp0 * c + hCPU->fpr[frB].fp0);
|
||||
float s1 = (float)(hCPU->fpr[frA].fp1 * c + hCPU->fpr[frB].fp1);
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_SEL(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
|
||||
if( hCPU->fpr[frA].fp0 >= -0.0f )
|
||||
hCPU->fpr[frD].fp0 = hCPU->fpr[frC].fp0;
|
||||
else
|
||||
hCPU->fpr[frD].fp0 = hCPU->fpr[frB].fp0;
|
||||
|
||||
if( hCPU->fpr[frA].fp1 >= -0.0f )
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frC].fp1;
|
||||
else
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_SUM0(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
float s0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
|
||||
float s1 = (float)hCPU->fpr[frC].fp1;
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_SUM1(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
float s0 = (float)hCPU->fpr[frC].fp0;
|
||||
float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MULS0(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
double c = roundTo25BitAccuracy(hCPU->fpr[frC].fp0);
|
||||
float s0 = (float)(hCPU->fpr[frA].fp0 * c);
|
||||
float s1 = (float)(hCPU->fpr[frA].fp1 * c);
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MULS1(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frC;
|
||||
frC = (Opcode>>6)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
double c = roundTo25BitAccuracy(hCPU->fpr[frC].fp1);
|
||||
float s0 = (float)(hCPU->fpr[frA].fp0 * c);
|
||||
float s1 = (float)(hCPU->fpr[frA].fp1 * c);
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MR(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0 = hCPU->fpr[frB].fp0;
|
||||
hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_NEG(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0 = -hCPU->fpr[frB].fp0;
|
||||
hCPU->fpr[frD].fp1 = -hCPU->fpr[frB].fp1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_ABS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0int = hCPU->fpr[frB].fp0int & ~(1ULL << 63);
|
||||
hCPU->fpr[frD].fp1int = hCPU->fpr[frB].fp1int & ~(1ULL << 63);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_NABS(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0int = hCPU->fpr[frB].fp0int | (1ULL << 63);
|
||||
hCPU->fpr[frD].fp1int = hCPU->fpr[frB].fp1int | (1ULL << 63);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_RSQRTE(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0 = (float)frsqrte_espresso(hCPU->fpr[frB].fp0);
|
||||
hCPU->fpr[frD].fp1 = (float)frsqrte_espresso(hCPU->fpr[frB].fp1);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MERGE00(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
double s0 = hCPU->fpr[frA].fp0;
|
||||
double s1 = hCPU->fpr[frB].fp0;
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MERGE01(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
double s0 = hCPU->fpr[frA].fp0;
|
||||
double s1 = hCPU->fpr[frB].fp1;
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MERGE10(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
double s0 = hCPU->fpr[frA].fp1;
|
||||
double s1 = hCPU->fpr[frB].fp0;
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_MERGE11(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frA, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
double s0 = hCPU->fpr[frA].fp1;
|
||||
double s1 = hCPU->fpr[frB].fp1;
|
||||
|
||||
hCPU->fpr[frD].fp0 = s0;
|
||||
hCPU->fpr[frD].fp1 = s1;
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_RES(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 frD, frB;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frD = (Opcode>>21)&0x1F;
|
||||
|
||||
hCPU->fpr[frD].fp0 = (float)fres_espresso(hCPU->fpr[frB].fp0);
|
||||
hCPU->fpr[frD].fp1 = (float)fres_espresso(hCPU->fpr[frB].fp1);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
// PS compare
|
||||
|
||||
void PPCInterpreter_PS_CMPO0(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
|
||||
sint32 crfD, frA, frB;
|
||||
uint32 c=0;
|
||||
frB = (Opcode>>11)&0x1F;
|
||||
frA = (Opcode>>16)&0x1F;
|
||||
crfD = (Opcode>>23)&0x7;
|
||||
|
||||
|
||||
double a = hCPU->fpr[frA].fp0;
|
||||
double b = hCPU->fpr[frB].fp0;
|
||||
|
||||
ppc_setCRBit(hCPU, crfD*4+0, 0);
|
||||
ppc_setCRBit(hCPU, crfD*4+1, 0);
|
||||
ppc_setCRBit(hCPU, crfD*4+2, 0);
|
||||
ppc_setCRBit(hCPU, crfD*4+3, 0);
|
||||
|
||||
if(IS_NAN(*(uint64*)&a) || IS_NAN(*(uint64*)&b))
|
||||
{
|
||||
c = 1;
|
||||
ppc_setCRBit(hCPU, crfD*4+CR_BIT_SO, 1);
|
||||
}
|
||||
else if(a < b)
|
||||
{
|
||||
c = 8;
|
||||
ppc_setCRBit(hCPU, crfD*4+CR_BIT_LT, 1);
|
||||
}
|
||||
else if(a > b)
|
||||
{
|
||||
c = 4;
|
||||
ppc_setCRBit(hCPU, crfD*4+CR_BIT_GT, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
c = 2;
|
||||
ppc_setCRBit(hCPU, crfD*4+CR_BIT_EQ, 1);
|
||||
}
|
||||
|
||||
hCPU->fpscr = (hCPU->fpscr & 0xffff0fff) | (c << 12);
|
||||
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_CMPU0(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
sint32 crfD, frA, frB;
|
||||
frB = (Opcode >> 11) & 0x1F;
|
||||
frA = (Opcode >> 16) & 0x1F;
|
||||
crfD = (Opcode >> 21) & (0x7<<2);
|
||||
fcmpu_espresso(hCPU, crfD, hCPU->fpr[frA].fp0, hCPU->fpr[frB].fp0);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
void PPCInterpreter_PS_CMPU1(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
FPUCheckAvailable();
|
||||
sint32 crfD, frA, frB;
|
||||
frB = (Opcode >> 11) & 0x1F;
|
||||
frA = (Opcode >> 16) & 0x1F;
|
||||
crfD = (Opcode >> 21) & (0x7 << 2);
|
||||
double a = hCPU->fpr[frA].fp1;
|
||||
double b = hCPU->fpr[frB].fp1;
|
||||
fcmpu_espresso(hCPU, crfD, hCPU->fpr[frA].fp1, hCPU->fpr[frB].fp1);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
875
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterSPR.hpp
Normal file
875
src/Cafe/HW/Espresso/Interpreter/PPCInterpreterSPR.hpp
Normal file
|
@ -0,0 +1,875 @@
|
|||
#define SPR_TBL_WRITE (284)
|
||||
#define SPR_TBU_WRITE (285)
|
||||
|
||||
#define SPR_DBATU_0 (536)
|
||||
#define SPR_DBATU_1 (538)
|
||||
#define SPR_DBATU_2 (540)
|
||||
#define SPR_DBATU_3 (542)
|
||||
#define SPR_DBATU_4 (568)
|
||||
#define SPR_DBATU_5 (570)
|
||||
#define SPR_DBATU_6 (572)
|
||||
#define SPR_DBATU_7 (574)
|
||||
|
||||
#define SPR_DBATL_0 (537)
|
||||
#define SPR_DBATL_1 (539)
|
||||
#define SPR_DBATL_2 (541)
|
||||
#define SPR_DBATL_3 (543)
|
||||
#define SPR_DBATL_4 (569)
|
||||
#define SPR_DBATL_5 (571)
|
||||
#define SPR_DBATL_6 (573)
|
||||
#define SPR_DBATL_7 (575)
|
||||
|
||||
#define SPR_IBATU_0 (528)
|
||||
#define SPR_IBATU_1 (530)
|
||||
#define SPR_IBATU_2 (532)
|
||||
#define SPR_IBATU_3 (534)
|
||||
#define SPR_IBATU_4 (560)
|
||||
#define SPR_IBATU_5 (562)
|
||||
#define SPR_IBATU_6 (564)
|
||||
#define SPR_IBATU_7 (566)
|
||||
|
||||
#define SPR_IBATL_0 (529)
|
||||
#define SPR_IBATL_1 (531)
|
||||
#define SPR_IBATL_2 (533)
|
||||
#define SPR_IBATL_3 (535)
|
||||
#define SPR_IBATL_4 (561)
|
||||
#define SPR_IBATL_5 (563)
|
||||
#define SPR_IBATL_6 (565)
|
||||
#define SPR_IBATL_7 (567)
|
||||
|
||||
#define SPR_DSISR (18)
|
||||
#define SPR_DAR (19)
|
||||
|
||||
#define SPR_SPRG0 (272)
|
||||
#define SPR_SPRG1 (273)
|
||||
#define SPR_SPRG2 (274)
|
||||
#define SPR_SPRG3 (275)
|
||||
|
||||
//#define SPR_HID0 (1008)
|
||||
//#define SPR_HID2 (920)
|
||||
#define SPR_HID4 (1011)
|
||||
#define SPR_HID5 (944)
|
||||
|
||||
#define SPR_L2CR (1017) // L2 cache control
|
||||
|
||||
#define SPR_CAR (948) // global
|
||||
#define SPR_BCR (949) // global
|
||||
|
||||
static uint32 getPVR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
return 0x70010101; // guessed
|
||||
}
|
||||
|
||||
static uint32 getFPECR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
return hCPU->sprExtended.fpecr;
|
||||
}
|
||||
|
||||
static void setFPECR(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
hCPU->sprExtended.fpecr = newValue;
|
||||
}
|
||||
|
||||
static void setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
debug_printf("Set DEC to 0x%08x\n", newValue);
|
||||
//hCPU->sprExtended.fpecr = newValue;
|
||||
}
|
||||
|
||||
static uint32 getSPRG(PPCInterpreter_t* hCPU, uint32 sprgIndex)
|
||||
{
|
||||
return hCPU->sprExtended.sprg[sprgIndex];
|
||||
}
|
||||
|
||||
static void setSPRG(PPCInterpreter_t* hCPU, uint32 sprgIndex, uint32 newValue)
|
||||
{
|
||||
hCPU->sprExtended.sprg[sprgIndex] = newValue;
|
||||
}
|
||||
|
||||
static uint32 getDAR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
return hCPU->sprExtended.dar;
|
||||
}
|
||||
|
||||
static uint32 getDSISR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
return hCPU->sprExtended.dsisr;
|
||||
}
|
||||
|
||||
static uint32 getHID0(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static void setHID0(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
// todo
|
||||
debug_printf("Set HID0 to 0x%08x\n", newValue);
|
||||
}
|
||||
|
||||
static uint32 getHID1(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
debug_printf("Get HID1 IP 0x%08x\n", hCPU->instructionPointer);
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static uint32 getHID2(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
debug_printf("Get HID2 IP 0x%08x\n", hCPU->instructionPointer);
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static void setHID2(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
// todo
|
||||
debug_printf("Set HID2 to 0x%08x\n", newValue);
|
||||
}
|
||||
|
||||
static uint32 getHID4(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
debug_printf("Get HID4 IP 0x%08x\n", hCPU->instructionPointer);
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static void setHID4(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
// todo
|
||||
debug_printf("Set HID4 to 0x%08x\n", newValue);
|
||||
}
|
||||
|
||||
static uint32 getHID5(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
// Wii-U only
|
||||
debug_printf("Get HID5 IP 0x%08x\n", hCPU->instructionPointer);
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static void setHID5(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
// Wii-U only
|
||||
// todo
|
||||
debug_printf("Set HID5 to 0x%08x\n", newValue);
|
||||
}
|
||||
|
||||
static uint32 getSCR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
// WiiU mode only?
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static void setSCR(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
uint32 previousSCR = hCPU->global->sprGlobal.scr;
|
||||
newValue |= (previousSCR&0x80000000); // this bit always sticks?
|
||||
if ((previousSCR&0x80000000) == 0 && (newValue & 0x80000000) != 0)
|
||||
{
|
||||
// this bit is used to disable bootrom mapping, but we use it to know when to copy the decrypted ancast image into kernel memory
|
||||
debug_printf("SCR MSB set. Unmap bootrom?\n");
|
||||
|
||||
//memcpy(memory_base + 0xFFE00000, memory_base + 0x08000000, 0x180000);
|
||||
// hack - clear low memory (where bootrom was mapped/loaded)
|
||||
memset(memory_base, 0, 0x4000);
|
||||
//// todo - normally IOSU sets up some stuff here (probably)
|
||||
|
||||
// for debugging purposes make lowest page read-only
|
||||
#ifdef _WIN32
|
||||
DWORD oldProtect;
|
||||
VirtualProtect(memory_base, 0x1000, PAGE_READONLY, &oldProtect);
|
||||
#endif
|
||||
}
|
||||
debug_printf("Set SCR to 0x%08x\n", newValue);
|
||||
hCPU->global->sprGlobal.scr = newValue;
|
||||
}
|
||||
|
||||
// SCR probably has bits to control following:
|
||||
// disable bootrom (bit 0x80000000)
|
||||
// disable PPC OTP
|
||||
// bits to start the extra cores
|
||||
|
||||
static uint32 getCAR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
// global
|
||||
// WiiU mode only
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static void setCAR(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
// global
|
||||
// WiiU mode only
|
||||
debug_printf("Set CAR to 0x%08x\n", newValue);
|
||||
}
|
||||
|
||||
static uint32 getBCR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
// global
|
||||
// WiiU mode only
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static void setBCR(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
// global
|
||||
// WiiU mode only
|
||||
debug_printf("Set BCR to 0x%08x\n", newValue);
|
||||
}
|
||||
|
||||
|
||||
static uint32 getL2CR(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
return 0; // todo
|
||||
}
|
||||
|
||||
static void setL2CR(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
// todo
|
||||
}
|
||||
|
||||
static void setSRR0(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
hCPU->sprExtended.srr0 = newValue;
|
||||
}
|
||||
|
||||
static void setSRR1(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
hCPU->sprExtended.srr1 = newValue;
|
||||
}
|
||||
|
||||
static void setDMAU(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
hCPU->sprExtended.dmaU = newValue;
|
||||
}
|
||||
|
||||
static void setDMAL(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
hCPU->sprExtended.dmaL = newValue;
|
||||
// LC DMA
|
||||
if(newValue &0x2 )
|
||||
{
|
||||
uint32 transferLength = (((hCPU->sprExtended.dmaU>>0)&0x1F)<<2)|((newValue>>2)&3);
|
||||
uint32 memAddr = (hCPU->sprExtended.dmaU)&0xFFFFFFE0;
|
||||
uint32 cacheAddr = (newValue)&0xFFFFFFE0;
|
||||
if( transferLength == 0 )
|
||||
transferLength = 128;
|
||||
transferLength *= 32;
|
||||
bool isLoad = ((newValue>>4)&1)!=0;
|
||||
if( (cacheAddr>>28) != 0xE )
|
||||
{
|
||||
debug_printf("LCTransfer: Not a cache address\n");
|
||||
cacheAddr = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cacheAddr -= 0xE0000000;
|
||||
}
|
||||
if( isLoad == 0 )
|
||||
{
|
||||
// locked cache -> memory
|
||||
debug_printf("L2->MEM %08x -> %08x size: 0x%x\n", memAddr, 0xE0000000 + cacheAddr, transferLength);
|
||||
memcpy(memory_getPointerFromVirtualOffset(memAddr), memory_base+0xE0000000+cacheAddr, transferLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
// memory -> locked cache
|
||||
debug_printf("MEM->L2 %08x -> %08x size: 0x%x\n", 0xE0000000 + cacheAddr, memAddr, transferLength);
|
||||
memcpy(memory_base + 0xE0000000 + cacheAddr, memory_getPointerFromVirtualOffset(memAddr), transferLength);
|
||||
}
|
||||
newValue &= ~2;
|
||||
hCPU->sprExtended.dmaL = newValue;
|
||||
}
|
||||
}
|
||||
|
||||
static void setDBATL(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
|
||||
{
|
||||
debug_printf("Set DBATL%d to 0x%08x\n", index, newValue);
|
||||
hCPU->sprExtended.dbatL[index] = newValue;
|
||||
}
|
||||
|
||||
static void setDBATU(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
|
||||
{
|
||||
debug_printf("Set DBATU%d to 0x%08x\n", index, newValue);
|
||||
hCPU->sprExtended.dbatU[index] = newValue;
|
||||
}
|
||||
|
||||
static void setIBATL(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
|
||||
{
|
||||
debug_printf("Set IBATL%d to 0x%08x\n", index, newValue);
|
||||
hCPU->sprExtended.ibatL[index] = newValue;
|
||||
}
|
||||
|
||||
static void setIBATU(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
|
||||
{
|
||||
debug_printf("Set IBATU%d to 0x%08x\n", index, newValue);
|
||||
hCPU->sprExtended.ibatU[index] = newValue;
|
||||
}
|
||||
|
||||
static uint32 getDBATL(PPCInterpreter_t* hCPU, uint32 index)
|
||||
{
|
||||
return hCPU->sprExtended.dbatL[index];
|
||||
}
|
||||
|
||||
static uint32 getDBATU(PPCInterpreter_t* hCPU, uint32 index)
|
||||
{
|
||||
return hCPU->sprExtended.dbatU[index];
|
||||
}
|
||||
|
||||
static uint32 getIBATL(PPCInterpreter_t* hCPU, uint32 index)
|
||||
{
|
||||
return hCPU->sprExtended.ibatL[index];
|
||||
}
|
||||
|
||||
static uint32 getIBATU(PPCInterpreter_t* hCPU, uint32 index)
|
||||
{
|
||||
return hCPU->sprExtended.ibatU[index];
|
||||
}
|
||||
|
||||
static void setSR(PPCInterpreter_t* hCPU, uint32 index, uint32 newValue)
|
||||
{
|
||||
debug_printf("Set SR%d to 0x%08x IP %08x LR %08x\n", index, newValue, hCPU->instructionPointer, hCPU->spr.LR);
|
||||
hCPU->sprExtended.sr[index] = newValue;
|
||||
}
|
||||
|
||||
static uint32 getSR(PPCInterpreter_t* hCPU, uint32 index)
|
||||
{
|
||||
return hCPU->sprExtended.sr[index];
|
||||
}
|
||||
|
||||
static void setSDR1(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
debug_printf("Set SDR1 to 0x%08x\n", newValue);
|
||||
hCPU->sprExtended.sdr1 = newValue;
|
||||
}
|
||||
|
||||
static void setTBL(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
if (newValue != 0)
|
||||
assert_dbg();
|
||||
debug_printf("Reset TB\n");
|
||||
hCPU->global->tb = 0;
|
||||
}
|
||||
|
||||
static void setTBU(PPCInterpreter_t* hCPU, uint32 newValue)
|
||||
{
|
||||
if (newValue != 0)
|
||||
assert_dbg();
|
||||
debug_printf("Reset TB\n");
|
||||
hCPU->global->tb = 0;
|
||||
}
|
||||
|
||||
static void PPCSprSupervisor_set(PPCInterpreter_t* hCPU, uint32 spr, uint32 newValue)
|
||||
{
|
||||
switch (spr)
|
||||
{
|
||||
case SPR_LR:
|
||||
hCPU->spr.LR = newValue;
|
||||
break;
|
||||
case SPR_CTR:
|
||||
hCPU->spr.CTR = newValue;
|
||||
break;
|
||||
case SPR_DEC:
|
||||
setDEC(hCPU, newValue);
|
||||
break;
|
||||
case SPR_XER:
|
||||
PPCInterpreter_setXER(hCPU, newValue);
|
||||
break;
|
||||
case SPR_UGQR0:
|
||||
case SPR_UGQR1:
|
||||
case SPR_UGQR2:
|
||||
case SPR_UGQR3:
|
||||
case SPR_UGQR4:
|
||||
case SPR_UGQR5:
|
||||
case SPR_UGQR6:
|
||||
case SPR_UGQR7:
|
||||
hCPU->spr.UGQR[spr - SPR_UGQR0] = newValue;
|
||||
break;
|
||||
// values above are user mode accessible
|
||||
case SPR_TBL_WRITE: // TBL
|
||||
setTBL(hCPU, newValue);
|
||||
break;
|
||||
case SPR_TBU_WRITE: // TBU
|
||||
setTBU(hCPU, newValue);
|
||||
break;
|
||||
case SPR_FPECR:
|
||||
setFPECR(hCPU, newValue);
|
||||
break;
|
||||
case SPR_HID0:
|
||||
setHID0(hCPU, newValue);
|
||||
break;
|
||||
case SPR_HID2:
|
||||
setHID2(hCPU, newValue);
|
||||
break;
|
||||
case SPR_HID4:
|
||||
setHID4(hCPU, newValue);
|
||||
break;
|
||||
case SPR_HID5:
|
||||
setHID5(hCPU, newValue);
|
||||
break;
|
||||
case SPR_L2CR:
|
||||
setL2CR(hCPU, newValue);
|
||||
break;
|
||||
case SPR_SRR0:
|
||||
setSRR0(hCPU, newValue);
|
||||
break;
|
||||
case SPR_SRR1:
|
||||
setSRR1(hCPU, newValue);
|
||||
break;
|
||||
case SPR_SPRG0:
|
||||
setSPRG(hCPU, 0, newValue);
|
||||
break;
|
||||
case SPR_SPRG1:
|
||||
setSPRG(hCPU, 1, newValue);
|
||||
break;
|
||||
case SPR_SPRG2:
|
||||
setSPRG(hCPU, 2, newValue);
|
||||
break;
|
||||
case SPR_SPRG3:
|
||||
setSPRG(hCPU, 3, newValue);
|
||||
break;
|
||||
case SPR_SCR:
|
||||
setSCR(hCPU, newValue);
|
||||
break;
|
||||
case SPR_CAR:
|
||||
setCAR(hCPU, newValue);
|
||||
break;
|
||||
case SPR_BCR:
|
||||
setBCR(hCPU, newValue);
|
||||
break;
|
||||
case SPR_DMAU:
|
||||
setDMAU(hCPU, newValue);
|
||||
break;
|
||||
case SPR_DMAL:
|
||||
setDMAL(hCPU, newValue);
|
||||
break;
|
||||
case SPR_DBATU_0:
|
||||
setDBATU(hCPU, 0, newValue);
|
||||
break;
|
||||
case SPR_DBATU_1:
|
||||
setDBATU(hCPU, 1, newValue);
|
||||
break;
|
||||
case SPR_DBATU_2:
|
||||
setDBATU(hCPU, 2, newValue);
|
||||
break;
|
||||
case SPR_DBATU_3:
|
||||
setDBATU(hCPU, 3, newValue);
|
||||
break;
|
||||
case SPR_DBATU_4:
|
||||
setDBATU(hCPU, 4, newValue);
|
||||
break;
|
||||
case SPR_DBATU_5:
|
||||
setDBATU(hCPU, 5, newValue);
|
||||
break;
|
||||
case SPR_DBATU_6:
|
||||
setDBATU(hCPU, 6, newValue);
|
||||
break;
|
||||
case SPR_DBATU_7:
|
||||
setDBATU(hCPU, 7, newValue);
|
||||
break;
|
||||
case SPR_DBATL_0:
|
||||
setDBATL(hCPU, 0, newValue);
|
||||
break;
|
||||
case SPR_DBATL_1:
|
||||
setDBATL(hCPU, 1, newValue);
|
||||
break;
|
||||
case SPR_DBATL_2:
|
||||
setDBATL(hCPU, 2, newValue);
|
||||
break;
|
||||
case SPR_DBATL_3:
|
||||
setDBATL(hCPU, 3, newValue);
|
||||
break;
|
||||
case SPR_DBATL_4:
|
||||
setDBATL(hCPU, 4, newValue);
|
||||
break;
|
||||
case SPR_DBATL_5:
|
||||
setDBATL(hCPU, 5, newValue);
|
||||
break;
|
||||
case SPR_DBATL_6:
|
||||
setDBATL(hCPU, 6, newValue);
|
||||
break;
|
||||
case SPR_DBATL_7:
|
||||
setDBATL(hCPU, 7, newValue);
|
||||
break;
|
||||
case SPR_IBATU_0:
|
||||
setIBATU(hCPU, 0, newValue);
|
||||
break;
|
||||
case SPR_IBATU_1:
|
||||
setIBATU(hCPU, 1, newValue);
|
||||
break;
|
||||
case SPR_IBATU_2:
|
||||
setIBATU(hCPU, 2, newValue);
|
||||
break;
|
||||
case SPR_IBATU_3:
|
||||
setIBATU(hCPU, 3, newValue);
|
||||
break;
|
||||
case SPR_IBATU_4:
|
||||
setIBATU(hCPU, 4, newValue);
|
||||
break;
|
||||
case SPR_IBATU_5:
|
||||
setIBATU(hCPU, 5, newValue);
|
||||
break;
|
||||
case SPR_IBATU_6:
|
||||
setIBATU(hCPU, 6, newValue);
|
||||
break;
|
||||
case SPR_IBATU_7:
|
||||
setIBATU(hCPU, 7, newValue);
|
||||
break;
|
||||
case SPR_IBATL_0:
|
||||
setIBATL(hCPU, 0, newValue);
|
||||
break;
|
||||
case SPR_IBATL_1:
|
||||
setIBATL(hCPU, 1, newValue);
|
||||
break;
|
||||
case SPR_IBATL_2:
|
||||
setIBATL(hCPU, 2, newValue);
|
||||
break;
|
||||
case SPR_IBATL_3:
|
||||
setIBATL(hCPU, 3, newValue);
|
||||
break;
|
||||
case SPR_IBATL_4:
|
||||
setIBATL(hCPU, 4, newValue);
|
||||
break;
|
||||
case SPR_IBATL_5:
|
||||
setIBATL(hCPU, 5, newValue);
|
||||
break;
|
||||
case SPR_IBATL_6:
|
||||
setIBATL(hCPU, 6, newValue);
|
||||
break;
|
||||
case SPR_IBATL_7:
|
||||
setIBATL(hCPU, 7, newValue);
|
||||
break;
|
||||
case SPR_SDR1:
|
||||
setSDR1(hCPU, newValue);
|
||||
break;
|
||||
case 0x3B8: // mmcr0
|
||||
debug_printf("Write performance monitor SPR mmcr0 0x%08x", newValue);
|
||||
break;
|
||||
case 0x3B9: // PMC1
|
||||
debug_printf("Write performance monitor SPR PMC1 0x%08x", newValue);
|
||||
break;
|
||||
case 0x3BA: // PMC2
|
||||
debug_printf("Write performance monitor SPR PMC2 0x%08x", newValue);
|
||||
break;
|
||||
case 0x3BC: // mmcr1
|
||||
debug_printf("Write performance monitor SPR mmcr1 0x%08x", newValue);
|
||||
break;
|
||||
case 0x3BD: // PMC3
|
||||
debug_printf("Write performance monitor SPR PMC3 0x%08x", newValue);
|
||||
break;
|
||||
case 0x3BE: // PMC4
|
||||
debug_printf("Write performance monitor SPR PMC4 0x%08x", newValue);
|
||||
break;
|
||||
default:
|
||||
debug_printf("[C%d] Set unhandled SPR 0x%x to %08x (supervisor mode)\n", hCPU->spr.UPIR, spr, newValue);
|
||||
#ifndef PUBLIC_RELEASE
|
||||
assert_dbg();
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void PPCSpr_set(PPCInterpreter_t* hCPU, uint32 spr, uint32 newValue)
|
||||
{
|
||||
if constexpr(ppcItpCtrl::allowSupervisorMode)
|
||||
{
|
||||
// todo - check if in supervisor mode or user mode
|
||||
PPCSprSupervisor_set(hCPU, spr, newValue);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (spr)
|
||||
{
|
||||
case SPR_LR:
|
||||
hCPU->spr.LR = newValue;
|
||||
break;
|
||||
case SPR_CTR:
|
||||
hCPU->spr.CTR = newValue;
|
||||
break;
|
||||
case SPR_XER:
|
||||
PPCInterpreter_setXER(hCPU, newValue);
|
||||
break;
|
||||
case SPR_UGQR0:
|
||||
case SPR_UGQR1:
|
||||
case SPR_UGQR2:
|
||||
case SPR_UGQR3:
|
||||
case SPR_UGQR4:
|
||||
case SPR_UGQR5:
|
||||
case SPR_UGQR6:
|
||||
case SPR_UGQR7:
|
||||
hCPU->spr.UGQR[spr - SPR_UGQR0] = newValue;
|
||||
break;
|
||||
default:
|
||||
debug_printf("[C%d] Set unhandled SPR %d to %08x\n", hCPU->spr.UPIR, spr, newValue);
|
||||
#ifndef PUBLIC_RELEASE
|
||||
assert_dbg();
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32 PPCSprSupervisor_get(PPCInterpreter_t* hCPU, uint32 spr)
|
||||
{
|
||||
uint32 v = 0;
|
||||
switch (spr)
|
||||
{
|
||||
case SPR_LR:
|
||||
v = hCPU->spr.LR;
|
||||
break;
|
||||
case SPR_CTR:
|
||||
v = hCPU->spr.CTR;
|
||||
break;
|
||||
case SPR_XER:
|
||||
v = PPCInterpreter_getXER(hCPU);
|
||||
break;
|
||||
case SPR_UPIR:
|
||||
v = hCPU->spr.UPIR;
|
||||
break;
|
||||
case SPR_UGQR0:
|
||||
case SPR_UGQR1:
|
||||
case SPR_UGQR2:
|
||||
case SPR_UGQR3:
|
||||
case SPR_UGQR4:
|
||||
case SPR_UGQR5:
|
||||
case SPR_UGQR6:
|
||||
case SPR_UGQR7:
|
||||
v = hCPU->spr.UGQR[spr - SPR_UGQR0];
|
||||
break;
|
||||
// above are registers accessible in user mode
|
||||
case SPR_PVR:
|
||||
v = getPVR(hCPU);
|
||||
break;
|
||||
case SPR_HID0:
|
||||
v = getHID0(hCPU);
|
||||
break;
|
||||
case SPR_HID1:
|
||||
v = getHID1(hCPU);
|
||||
break;
|
||||
case SPR_HID2:
|
||||
v = getHID2(hCPU);
|
||||
break;
|
||||
case SPR_HID4:
|
||||
v = getHID4(hCPU);
|
||||
break;
|
||||
case SPR_HID5:
|
||||
v = getHID5(hCPU);
|
||||
break;
|
||||
case SPR_SCR:
|
||||
v = getSCR(hCPU);
|
||||
break;
|
||||
case SPR_CAR:
|
||||
v = getCAR(hCPU);
|
||||
break;
|
||||
case SPR_BCR:
|
||||
v = getBCR(hCPU);
|
||||
break;
|
||||
case SPR_DAR:
|
||||
v = getDAR(hCPU);
|
||||
break;
|
||||
case SPR_DSISR:
|
||||
v = getDSISR(hCPU);
|
||||
break;
|
||||
case SPR_L2CR:
|
||||
v = getL2CR(hCPU);
|
||||
break;
|
||||
case SPR_FPECR:
|
||||
v = getFPECR(hCPU);
|
||||
break;
|
||||
case SPR_SPRG0:
|
||||
v = getSPRG(hCPU, 0);
|
||||
break;
|
||||
case SPR_SPRG1:
|
||||
v = getSPRG(hCPU, 1);
|
||||
break;
|
||||
case SPR_SPRG2:
|
||||
v = getSPRG(hCPU, 2);
|
||||
break;
|
||||
case SPR_SPRG3:
|
||||
v = getSPRG(hCPU, 3);
|
||||
break;
|
||||
case SPR_DBATU_0:
|
||||
v = getDBATU(hCPU, 0);
|
||||
break;
|
||||
case SPR_DBATU_1:
|
||||
v = getDBATU(hCPU, 1);
|
||||
break;
|
||||
case SPR_DBATU_2:
|
||||
v = getDBATU(hCPU, 2);
|
||||
break;
|
||||
case SPR_DBATU_3:
|
||||
v = getDBATU(hCPU, 3);
|
||||
break;
|
||||
case SPR_DBATU_4:
|
||||
v = getDBATU(hCPU, 4);
|
||||
break;
|
||||
case SPR_DBATU_5:
|
||||
v = getDBATU(hCPU, 5);
|
||||
break;
|
||||
case SPR_DBATU_6:
|
||||
v = getDBATU(hCPU, 6);
|
||||
break;
|
||||
case SPR_DBATU_7:
|
||||
v = getDBATU(hCPU, 7);
|
||||
break;
|
||||
case SPR_DBATL_0:
|
||||
v = getDBATL(hCPU, 0);
|
||||
break;
|
||||
case SPR_DBATL_1:
|
||||
v = getDBATL(hCPU, 1);
|
||||
break;
|
||||
case SPR_DBATL_2:
|
||||
v = getDBATL(hCPU, 2);
|
||||
break;
|
||||
case SPR_DBATL_3:
|
||||
v = getDBATL(hCPU, 3);
|
||||
break;
|
||||
case SPR_DBATL_4:
|
||||
v = getDBATL(hCPU, 4);
|
||||
break;
|
||||
case SPR_DBATL_5:
|
||||
v = getDBATL(hCPU, 5);
|
||||
break;
|
||||
case SPR_DBATL_6:
|
||||
v = getDBATL(hCPU, 6);
|
||||
break;
|
||||
case SPR_DBATL_7:
|
||||
v = getDBATL(hCPU, 7);
|
||||
break;
|
||||
case SPR_IBATU_0:
|
||||
v = getIBATU(hCPU, 0);
|
||||
break;
|
||||
case SPR_IBATU_1:
|
||||
v = getIBATU(hCPU, 1);
|
||||
break;
|
||||
case SPR_IBATU_2:
|
||||
v = getIBATU(hCPU, 2);
|
||||
break;
|
||||
case SPR_IBATU_3:
|
||||
v = getIBATU(hCPU, 3);
|
||||
break;
|
||||
case SPR_IBATU_4:
|
||||
v = getIBATU(hCPU, 4);
|
||||
break;
|
||||
case SPR_IBATU_5:
|
||||
v = getIBATU(hCPU, 5);
|
||||
break;
|
||||
case SPR_IBATU_6:
|
||||
v = getIBATU(hCPU, 6);
|
||||
break;
|
||||
case SPR_IBATU_7:
|
||||
v = getIBATU(hCPU, 7);
|
||||
break;
|
||||
case SPR_IBATL_0:
|
||||
v = getIBATL(hCPU, 0);
|
||||
break;
|
||||
case SPR_IBATL_1:
|
||||
v = getIBATL(hCPU, 1);
|
||||
break;
|
||||
case SPR_IBATL_2:
|
||||
v = getIBATL(hCPU, 2);
|
||||
break;
|
||||
case SPR_IBATL_3:
|
||||
v = getIBATL(hCPU, 3);
|
||||
break;
|
||||
case SPR_IBATL_4:
|
||||
v = getIBATL(hCPU, 4);
|
||||
break;
|
||||
case SPR_IBATL_5:
|
||||
v = getIBATL(hCPU, 5);
|
||||
break;
|
||||
case SPR_IBATL_6:
|
||||
v = getIBATL(hCPU, 6);
|
||||
break;
|
||||
case SPR_IBATL_7:
|
||||
v = getIBATL(hCPU, 7);
|
||||
break;
|
||||
default:
|
||||
debug_printf("[C%d] Get unhandled SPR %d\n", hCPU->spr.UPIR, spr);
|
||||
#ifndef PUBLIC_RELEASE
|
||||
assert_dbg();
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static uint32 PPCSpr_get(PPCInterpreter_t* hCPU, uint32 spr)
|
||||
{
|
||||
if constexpr(ppcItpCtrl::allowSupervisorMode)
|
||||
{
|
||||
// todo - check if in supervisor mode or user mode
|
||||
return PPCSprSupervisor_get(hCPU, spr);
|
||||
}
|
||||
|
||||
uint32 v = 0;
|
||||
switch (spr)
|
||||
{
|
||||
case SPR_LR:
|
||||
v = hCPU->spr.LR;
|
||||
break;
|
||||
case SPR_CTR:
|
||||
v = hCPU->spr.CTR;
|
||||
break;
|
||||
case SPR_XER:
|
||||
v = PPCInterpreter_getXER(hCPU);
|
||||
break;
|
||||
case SPR_DEC:
|
||||
// special handling for DEC register
|
||||
{
|
||||
assert_dbg();
|
||||
uint64 passedCycled = PPCInterpreter_getMainCoreCycleCounter() - ppcMainThreadDECCycleStart;
|
||||
if (passedCycled >= (uint64)ppcMainThreadDECCycleValue)
|
||||
v = 0;
|
||||
else
|
||||
v = (uint32)(ppcMainThreadDECCycleValue - passedCycled);
|
||||
}
|
||||
break;
|
||||
case SPR_UPIR:
|
||||
v = hCPU->spr.UPIR;
|
||||
break;
|
||||
case SPR_PVR:
|
||||
assert_dbg();
|
||||
//v = hCPU->sprNew.PVR;
|
||||
break;
|
||||
case SPR_UGQR0:
|
||||
case SPR_UGQR1:
|
||||
case SPR_UGQR2:
|
||||
case SPR_UGQR3:
|
||||
case SPR_UGQR4:
|
||||
case SPR_UGQR5:
|
||||
case SPR_UGQR6:
|
||||
case SPR_UGQR7:
|
||||
v = hCPU->spr.UGQR[spr - SPR_UGQR0];
|
||||
break;
|
||||
default:
|
||||
debug_printf("[C%d] Get unhandled SPR %d\n", hCPU->spr.UPIR, spr);
|
||||
#ifndef PUBLIC_RELEASE
|
||||
assert_dbg();
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//if( spr == SPR_LR || spr == SPR_PVR || spr == SPR_UPIR || spr == SPR_SCR || (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) )
|
||||
//{
|
||||
// // readable registers
|
||||
// v = hCPU->spr[spr];
|
||||
//}
|
||||
//else if( spr == SPR_DEC )
|
||||
//{
|
||||
// // special handling for DEC register
|
||||
// uint64 passedCycled = PPCInterpreter_getMainCoreCycleCounter() - ppcMainThreadDECCycleStart;
|
||||
// if( passedCycled >= (uint64)ppcMainThreadDECCycleValue )
|
||||
// v = 0;
|
||||
// else
|
||||
// v = ppcMainThreadDECCycleValue - passedCycled;
|
||||
//}
|
||||
//else if( spr == SPR_XER )
|
||||
//{
|
||||
// v = PPCInterpreter_getXER(hCPU);
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
// debug_printf("[C%d] Get unhandled SPR %d value: %08x\n", hCPU->spr[SPR_UPIR], spr, hCPU->spr[spr]);
|
||||
// v = hCPU->spr[spr];
|
||||
//}
|
||||
return v;
|
||||
}
|
75
src/Cafe/HW/Espresso/PPCCallback.h
Normal file
75
src/Cafe/HW/Espresso/PPCCallback.h
Normal file
|
@ -0,0 +1,75 @@
|
|||
#pragma once
|
||||
#include "PPCState.h"
|
||||
|
||||
struct PPCCoreCallbackData_t
|
||||
{
|
||||
sint32 gprCount = 0;
|
||||
sint32 floatCount = 0;
|
||||
};
|
||||
|
||||
// callback functions
|
||||
inline uint32 PPCCoreCallback(MPTR function, const PPCCoreCallbackData_t& data)
|
||||
{
|
||||
return PPCCore_executeCallbackInternal(function)->gpr[3];
|
||||
}
|
||||
|
||||
template <typename T, typename... TArgs>
|
||||
uint32 PPCCoreCallback(MPTR function, PPCCoreCallbackData_t& data, T currentArg, TArgs... args)
|
||||
{
|
||||
cemu_assert_debug(data.gprCount <= 8);
|
||||
cemu_assert_debug(data.floatCount <= 8);
|
||||
if constexpr (std::is_pointer_v<T>)
|
||||
{
|
||||
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = MEMPTR(currentArg).GetMPTR();
|
||||
data.gprCount++;
|
||||
}
|
||||
else if constexpr (std::is_base_of_v<MEMPTRBase, std::remove_reference_t<T>>)
|
||||
{
|
||||
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = currentArg.GetMPTR();
|
||||
data.gprCount++;
|
||||
}
|
||||
else if constexpr (std::is_reference_v<T>)
|
||||
{
|
||||
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = MEMPTR(¤tArg).GetMPTR();
|
||||
data.gprCount++;
|
||||
}
|
||||
else if constexpr(std::is_enum_v<T>)
|
||||
{
|
||||
using TEnum = typename std::underlying_type<T>::type;
|
||||
return PPCCoreCallback<TEnum>(function, data, (TEnum)currentArg, std::forward(args)...);
|
||||
}
|
||||
else if constexpr (std::is_floating_point_v<T>)
|
||||
{
|
||||
ppcInterpreterCurrentInstance->fpr[1 + data.floatCount].fpr = (double)currentArg;
|
||||
data.floatCount++;
|
||||
}
|
||||
else if constexpr (std::is_integral_v<T> && sizeof(T) == sizeof(uint64))
|
||||
{
|
||||
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = (uint32)(currentArg >> 32); // high
|
||||
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount + 1] = (uint32)currentArg; // low
|
||||
|
||||
data.gprCount += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = (uint32)currentArg;
|
||||
data.gprCount++;
|
||||
}
|
||||
|
||||
return PPCCoreCallback(function, data, args...);
|
||||
}
|
||||
|
||||
template <typename... TArgs>
|
||||
uint32 PPCCoreCallback(MPTR function, TArgs... args)
|
||||
{
|
||||
PPCCoreCallbackData_t data{};
|
||||
return PPCCoreCallback(function, data, std::forward<TArgs>(args)...);
|
||||
}
|
||||
|
||||
template <typename... TArgs>
|
||||
uint32 PPCCoreCallback(void* functionPtr, TArgs... args)
|
||||
{
|
||||
MEMPTR<void> _tmp{ functionPtr };
|
||||
PPCCoreCallbackData_t data{};
|
||||
return PPCCoreCallback(_tmp.GetMPTR(), data, std::forward<TArgs>(args)...);
|
||||
}
|
115
src/Cafe/HW/Espresso/PPCScheduler.cpp
Normal file
115
src/Cafe/HW/Espresso/PPCScheduler.cpp
Normal file
|
@ -0,0 +1,115 @@
|
|||
#include "Cafe/OS/libs/gx2/GX2.h"
|
||||
#include "Cafe/HW/Latte/Core/Latte.h"
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_Alarm.h"
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_Thread.h"
|
||||
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
||||
|
||||
#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h"
|
||||
#include "Cafe/CafeSystem.h"
|
||||
|
||||
uint32 ppcThreadQuantum = 45000; // execute 45000 instructions before thread reschedule happens, this value can be overwritten by game profiles
|
||||
|
||||
void PPCInterpreter_relinquishTimeslice()
|
||||
{
|
||||
if( ppcInterpreterCurrentInstance->remainingCycles >= 0 )
|
||||
{
|
||||
ppcInterpreterCurrentInstance->skippedCycles = ppcInterpreterCurrentInstance->remainingCycles + 1;
|
||||
ppcInterpreterCurrentInstance->remainingCycles = -1;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCCore_boostQuantum(sint32 numCycles)
|
||||
{
|
||||
ppcInterpreterCurrentInstance->remainingCycles += numCycles;
|
||||
}
|
||||
|
||||
void PPCCore_deboostQuantum(sint32 numCycles)
|
||||
{
|
||||
ppcInterpreterCurrentInstance->remainingCycles -= numCycles;
|
||||
}
|
||||
|
||||
namespace coreinit
|
||||
{
|
||||
void __OSThreadSwitchToNext();
|
||||
}
|
||||
|
||||
void PPCCore_switchToScheduler()
|
||||
{
|
||||
cemu_assert_debug(__OSHasSchedulerLock() == false); // scheduler lock must not be hold past thread time slice
|
||||
cemu_assert_debug(ppcInterpreterCurrentInstance->coreInterruptMask != 0 || CafeSystem::GetForegroundTitleId() == 0x000500001019e600);
|
||||
__OSLockScheduler();
|
||||
coreinit::__OSThreadSwitchToNext();
|
||||
__OSUnlockScheduler();
|
||||
}
|
||||
|
||||
void PPCCore_switchToSchedulerWithLock()
|
||||
{
|
||||
cemu_assert_debug(__OSHasSchedulerLock() == true); // scheduler lock must be hold
|
||||
cemu_assert_debug(ppcInterpreterCurrentInstance->coreInterruptMask != 0 || CafeSystem::GetForegroundTitleId() == 0x000500001019e600);
|
||||
coreinit::__OSThreadSwitchToNext();
|
||||
}
|
||||
|
||||
void _PPCCore_callbackExit(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
PPCInterpreter_relinquishTimeslice();
|
||||
hCPU->instructionPointer = 0;
|
||||
}
|
||||
|
||||
PPCInterpreter_t* PPCCore_executeCallbackInternal(uint32 functionMPTR)
|
||||
{
|
||||
cemu_assert_debug(functionMPTR != 0);
|
||||
PPCInterpreter_t* hCPU = ppcInterpreterCurrentInstance;
|
||||
// remember LR and instruction pointer
|
||||
uint32 lr = hCPU->spr.LR;
|
||||
uint32 ip = hCPU->instructionPointer;
|
||||
// save area
|
||||
hCPU->gpr[1] -= 16 * 4;
|
||||
// set LR
|
||||
hCPU->spr.LR = PPCInterpreter_makeCallableExportDepr(_PPCCore_callbackExit);
|
||||
// set instruction pointer
|
||||
hCPU->instructionPointer = functionMPTR;
|
||||
// execute code until we return from the function
|
||||
while (true)
|
||||
{
|
||||
hCPU->remainingCycles = ppcThreadQuantum;
|
||||
hCPU->skippedCycles = 0;
|
||||
if (hCPU->remainingCycles > 0)
|
||||
{
|
||||
// try to enter recompiler immediately
|
||||
PPCRecompiler_attemptEnter(hCPU, hCPU->instructionPointer);
|
||||
// execute any remaining instructions in interpreter
|
||||
while ((--hCPU->remainingCycles) >= 0)
|
||||
{
|
||||
PPCInterpreterSlim_executeInstruction(hCPU);
|
||||
};
|
||||
}
|
||||
if (hCPU->instructionPointer == 0)
|
||||
{
|
||||
// restore remaining cycles
|
||||
hCPU->remainingCycles += hCPU->skippedCycles;
|
||||
hCPU->skippedCycles = 0;
|
||||
break;
|
||||
}
|
||||
coreinit::OSYieldThread();
|
||||
}
|
||||
// save area
|
||||
hCPU->gpr[1] += 16 * 4;
|
||||
// restore LR and instruction pointer
|
||||
hCPU->spr.LR = lr;
|
||||
hCPU->instructionPointer = ip;
|
||||
return hCPU;
|
||||
}
|
||||
|
||||
__declspec(dllexport) void PPCCore_executeCallback(uint32 functionMPTR)
|
||||
{
|
||||
PPCCore_executeCallbackInternal(functionMPTR);
|
||||
}
|
||||
|
||||
void PPCCore_deleteAllThreads()
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void PPCCore_init()
|
||||
{
|
||||
}
|
245
src/Cafe/HW/Espresso/PPCSchedulerLLE.cpp
Normal file
245
src/Cafe/HW/Espresso/PPCSchedulerLLE.cpp
Normal file
|
@ -0,0 +1,245 @@
|
|||
|
||||
struct PPCInterpreterLLEContext_t
|
||||
{
|
||||
uint8 padding[1024 * 128]; // reserved memory for stack (for recompiler mode)
|
||||
PPCInterpreter_t cores[3];
|
||||
};
|
||||
|
||||
PPCInterpreterGlobal_t globalCPUState = { 0 };
|
||||
|
||||
void PPCCoreLLE_initCore(PPCInterpreter_t* hCPU, uint32 coreIndex)
|
||||
{
|
||||
hCPU->spr.UPIR = coreIndex;
|
||||
hCPU->global = &globalCPUState;
|
||||
}
|
||||
|
||||
#define SCR_C2 (0x200000) // enable core 2
|
||||
#define SCR_C1 (0x400000) // enable core 1
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32be ukn000;
|
||||
uint32be ukn004;
|
||||
uint32be ukn008;
|
||||
uint32be ukn00C;
|
||||
uint32be ukn010;
|
||||
uint32be ukn014;
|
||||
uint32be busFreq;
|
||||
uint32be ukn01C;
|
||||
uint32be ukn020[4];
|
||||
uint32be ukn030[4];
|
||||
uint32be ukn040[4];
|
||||
uint32be ukn050[4];
|
||||
uint32be ukn060[4];
|
||||
uint32be ukn070[4];
|
||||
uint32be ukn080[4];
|
||||
uint32be ukn090[4];
|
||||
uint32be ukn0A0[4];
|
||||
uint32be ukn0B0[4];
|
||||
uint32be ukn0C0;
|
||||
struct
|
||||
{
|
||||
uint32be id;
|
||||
uint32be baseAddress;
|
||||
uint32be size;
|
||||
}ramInfo[3];
|
||||
uint32 ukn0E8;
|
||||
uint32 ukn0EC;
|
||||
uint32 ukn0F0[4];
|
||||
uint32 ukn100[8];
|
||||
uint32 ukn120[8];
|
||||
uint32 ukn140[8];
|
||||
uint32 ukn160[8];
|
||||
uint32 ukn180[8];
|
||||
uint32 ukn1A0[8];
|
||||
uint32 ukn1C0[8];
|
||||
uint32 ukn1E0[8];
|
||||
uint32 ukn200[8];
|
||||
uint32 ukn220[8];
|
||||
uint32 ukn240[8];
|
||||
uint32 ukn260[8];
|
||||
uint32 ukn280[8];
|
||||
uint32 ukn2A0[8];
|
||||
uint32 ukn2C0[8];
|
||||
uint32 ukn2E0[8];
|
||||
uint32 ukn300[8];
|
||||
uint32 ukn320[8];
|
||||
uint32 ukn340[8];
|
||||
uint32 ukn360[8];
|
||||
uint32 ukn380[8];
|
||||
uint32be ukn3A0;
|
||||
uint32be ukn3A4;
|
||||
uint32be ukn3A8;
|
||||
uint32be ukn3AC;
|
||||
uint32be ukn3B0;
|
||||
uint32be smdpAreaPtr;
|
||||
uint32be ukn3B8;
|
||||
uint32be ukn3BC;
|
||||
uint32 ukn3C0[8];
|
||||
uint32 ukn3E0[8];
|
||||
uint32 ukn400;
|
||||
uint32 ukn404;
|
||||
uint32 ukn408;
|
||||
}ppcBootParamBlock_t; // for kernel 5.5.2
|
||||
|
||||
static_assert(offsetof(ppcBootParamBlock_t, ramInfo) == 0xC4, "");
|
||||
static_assert(offsetof(ppcBootParamBlock_t, busFreq) == 0x18, "");
|
||||
static_assert(offsetof(ppcBootParamBlock_t, smdpAreaPtr) == 0x3B4, "");
|
||||
static_assert(offsetof(ppcBootParamBlock_t, ukn400) == 0x400, "");
|
||||
|
||||
void PPCCoreLLE_setupBootParamBlock()
|
||||
{
|
||||
ppcBootParamBlock_t* bootParamBlock = (ppcBootParamBlock_t*)memory_getPointerFromPhysicalOffset(0x01FFF000);
|
||||
memset(bootParamBlock, 0, sizeof(ppcBootParamBlock_t));
|
||||
|
||||
// setup RAM info
|
||||
//PPCBaseAddress 0x8000000 0x00000000 0x28000000
|
||||
//PPCSize 0x120000 0x2000000 0xA8000000
|
||||
|
||||
bootParamBlock->ukn004 = 0x40C;
|
||||
|
||||
bootParamBlock->busFreq = ESPRESSO_BUS_CLOCK;
|
||||
|
||||
bootParamBlock->ramInfo[0].id = 0;
|
||||
bootParamBlock->ramInfo[0].baseAddress = 0x8000000;
|
||||
bootParamBlock->ramInfo[0].size = 0x120000;
|
||||
bootParamBlock->ramInfo[1].id = 1;
|
||||
bootParamBlock->ramInfo[1].baseAddress = 0x00000000;
|
||||
bootParamBlock->ramInfo[1].size = 0x2000000;
|
||||
bootParamBlock->ramInfo[2].id = 2;
|
||||
bootParamBlock->ramInfo[2].baseAddress = 0x28000000;
|
||||
bootParamBlock->ramInfo[2].size = 0xA8000000;
|
||||
|
||||
}
|
||||
typedef struct
|
||||
{
|
||||
uint32be magic;
|
||||
uint32be count;
|
||||
uint32 _padding08[14];
|
||||
/* +0x0040 */ uint32be commandsReadIndex; // written by IOSU
|
||||
uint32 _padding44[15];
|
||||
/* +0x0080 */ uint32be commandsWriteIndex;
|
||||
uint32 _padding84[15];
|
||||
/* +0x00C0 */ uint32be resultsReadIndex;
|
||||
uint32 _paddingC4[15];
|
||||
/* +0x0100 */ uint32be resultsWriteIndex; // written by IOSU
|
||||
uint32 _padding104[15];
|
||||
/* +0x0140 */ uint32be commandPtrs[0xC00];
|
||||
/* +0x3140 */ uint32be resultPtrs[0xC00];
|
||||
}smdpArea_t;
|
||||
|
||||
static_assert(offsetof(smdpArea_t, commandsReadIndex) == 0x0040, "");
|
||||
static_assert(offsetof(smdpArea_t, commandsWriteIndex) == 0x0080, "");
|
||||
static_assert(offsetof(smdpArea_t, resultsReadIndex) == 0x00C0, "");
|
||||
static_assert(offsetof(smdpArea_t, resultsWriteIndex) == 0x0100, "");
|
||||
static_assert(offsetof(smdpArea_t, resultPtrs) == 0x3140, "");
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32be type;
|
||||
uint32be ukn04;
|
||||
uint32be ukn08;
|
||||
uint32be ukn0C;
|
||||
uint32be ukn10;
|
||||
uint32be ukn14;
|
||||
uint32be ukn18;
|
||||
uint32be ukn1C;
|
||||
uint32be ukn20;
|
||||
uint32be ukn24;
|
||||
uint32be ukn28;
|
||||
uint32be ukn2C;
|
||||
}smdpCommand_t;
|
||||
|
||||
void smdpArea_pushResult(smdpArea_t* smdpArea, MPTR result)
|
||||
{
|
||||
//smdpArea.
|
||||
smdpArea->resultPtrs[(uint32)smdpArea->resultsWriteIndex] = result;
|
||||
smdpArea->resultsWriteIndex = ((uint32)smdpArea->resultsWriteIndex + 1)%(uint32)smdpArea->count;
|
||||
}
|
||||
|
||||
void smdpArea_processCommand(smdpArea_t* smdpArea, smdpCommand_t* cmd)
|
||||
{
|
||||
if (cmd->type == 1)
|
||||
{
|
||||
cmd->ukn08 = 1;
|
||||
// cmd->ukn2C ?
|
||||
forceLogDebug_printf("SMDP command received - todo");
|
||||
smdpArea_pushResult(smdpArea, memory_getVirtualOffsetFromPointer(cmd));
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void smdpArea_thread()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
ppcBootParamBlock_t* bootParamBlock = (ppcBootParamBlock_t*)memory_getPointerFromPhysicalOffset(0x01FFF000);
|
||||
if(bootParamBlock->smdpAreaPtr != MPTR_NULL)
|
||||
{
|
||||
smdpArea_t* smdpArea = (smdpArea_t*)memory_getPointerFromPhysicalOffset(bootParamBlock->smdpAreaPtr);
|
||||
if (smdpArea->magic == 'smdp')
|
||||
{
|
||||
uint32 cmdReadIndex = smdpArea->commandsReadIndex;
|
||||
uint32 cmdWriteIndex = smdpArea->commandsWriteIndex;
|
||||
if (cmdReadIndex != cmdWriteIndex)
|
||||
{
|
||||
// new command
|
||||
smdpArea_processCommand(smdpArea, (smdpCommand_t*)memory_getPointerFromPhysicalOffset(smdpArea->commandPtrs[cmdReadIndex]));
|
||||
// increment read counter
|
||||
cmdReadIndex = (cmdReadIndex + 1) % (uint32)smdpArea->count;
|
||||
smdpArea->commandsReadIndex = cmdReadIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
}
|
||||
|
||||
void PPCCoreLLE_startSingleCoreScheduler(uint32 entrypoint)
|
||||
{
|
||||
PPCInterpreterLLEContext_t* cpuContext = (PPCInterpreterLLEContext_t*)malloc(sizeof(PPCInterpreterLLEContext_t));
|
||||
memset(cpuContext, 0, sizeof(PPCInterpreterLLEContext_t));
|
||||
|
||||
PPCCoreLLE_setupBootParamBlock();
|
||||
|
||||
PPCCoreLLE_initCore(cpuContext->cores + 0, 0);
|
||||
PPCCoreLLE_initCore(cpuContext->cores + 1, 1);
|
||||
PPCCoreLLE_initCore(cpuContext->cores + 2, 2);
|
||||
|
||||
cpuContext->cores[0].instructionPointer = entrypoint;
|
||||
cpuContext->cores[1].instructionPointer = 0xFFF00100;
|
||||
cpuContext->cores[2].instructionPointer = 0xFFF00100;
|
||||
// todo - calculate instruction pointer when core 1/2 is enabled (because entry point is determined by MSR exception vector bit)
|
||||
std::thread(smdpArea_thread).detach();
|
||||
|
||||
while (true)
|
||||
{
|
||||
for (uint32 coreIndex = 0; coreIndex < 3; coreIndex++)
|
||||
{
|
||||
PPCInterpreter_t* hCPU = cpuContext->cores+coreIndex;
|
||||
ppcInterpreterCurrentInstance = hCPU;
|
||||
if (coreIndex == 1)
|
||||
{
|
||||
// check SCR core 1 enable bit
|
||||
if ((globalCPUState.sprGlobal.scr&SCR_C1) == 0)
|
||||
continue;
|
||||
}
|
||||
else if (coreIndex == 2)
|
||||
{
|
||||
// check SCR core 2 enable bit
|
||||
if ((globalCPUState.sprGlobal.scr&SCR_C2) == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
hCPU->remainingCycles = 10000;
|
||||
while ((--hCPU->remainingCycles) >= 0)
|
||||
{
|
||||
PPCInterpreterFull_executeInstruction(hCPU);
|
||||
};
|
||||
}
|
||||
}
|
||||
assert_dbg();
|
||||
}
|
255
src/Cafe/HW/Espresso/PPCState.h
Normal file
255
src/Cafe/HW/Espresso/PPCState.h
Normal file
|
@ -0,0 +1,255 @@
|
|||
#pragma once
|
||||
#include "Cafe/HW/MMU/MMU.h"
|
||||
|
||||
enum
|
||||
{
|
||||
CPUException_NOTHING,
|
||||
CPUException_FPUUNAVAILABLE,
|
||||
CPUException_EXTERNAL,
|
||||
CPUException_SYSTEMCALL
|
||||
};
|
||||
|
||||
#define PPC_LWARX_RESERVATION_MAX (4)
|
||||
|
||||
union FPR_t
|
||||
{
|
||||
double fpr;
|
||||
struct
|
||||
{
|
||||
double fp0;
|
||||
double fp1;
|
||||
};
|
||||
struct
|
||||
{
|
||||
uint64 guint;
|
||||
};
|
||||
struct
|
||||
{
|
||||
uint64 fp0int;
|
||||
uint64 fp1int;
|
||||
};
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32 scr;
|
||||
uint32 car;
|
||||
//uint32 bcr;
|
||||
}sprGlobal;
|
||||
uint64 tb;
|
||||
}PPCInterpreterGlobal_t;
|
||||
|
||||
struct PPCInterpreter_t
|
||||
{
|
||||
uint32 instructionPointer;
|
||||
uint32 gpr[32];
|
||||
FPR_t fpr[32];
|
||||
uint32 fpscr;
|
||||
uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..)
|
||||
uint8 xer_ca; // carry from xer
|
||||
uint8 LSQE;
|
||||
uint8 PSE;
|
||||
// thread remaining cycles
|
||||
sint32 remainingCycles; // if this value goes below zero, the next thread is scheduled
|
||||
sint32 skippedCycles; // number of skipped cycles
|
||||
struct
|
||||
{
|
||||
uint32 LR;
|
||||
uint32 CTR;
|
||||
uint32 XER;
|
||||
uint32 UPIR;
|
||||
uint32 UGQR[8];
|
||||
}spr;
|
||||
// LWARX and STWCX
|
||||
uint32 reservedMemAddr;
|
||||
uint32 reservedMemValue;
|
||||
/* Note: Everything above is potentially hardcoded into Cemuhook. Do not touch anything or it will risk breaking compatibility */
|
||||
// temporary storage for recompiler
|
||||
FPR_t temporaryFPR[8];
|
||||
uint32 temporaryGPR[4];
|
||||
// values below this are not used by Cafe OS usermode
|
||||
struct
|
||||
{
|
||||
uint32 fpecr; // is this the same register as fpscr ?
|
||||
uint32 DEC;
|
||||
uint32 srr0;
|
||||
uint32 srr1;
|
||||
uint32 PVR;
|
||||
uint32 msr;
|
||||
uint32 sprg[4];
|
||||
// DSI/ISI
|
||||
uint32 dar;
|
||||
uint32 dsisr;
|
||||
// DMA
|
||||
uint32 dmaU;
|
||||
uint32 dmaL;
|
||||
// MMU
|
||||
uint32 dbatU[8];
|
||||
uint32 dbatL[8];
|
||||
uint32 ibatU[8];
|
||||
uint32 ibatL[8];
|
||||
uint32 sr[16];
|
||||
uint32 sdr1;
|
||||
}sprExtended;
|
||||
// global CPU values
|
||||
PPCInterpreterGlobal_t* global;
|
||||
// interpreter control
|
||||
bool memoryException;
|
||||
// core context (starts at 0xFFFFFF00?)
|
||||
/* 0xFFFFFFE4 */ uint32 coreInterruptMask;
|
||||
|
||||
// extra variables for recompiler
|
||||
void* rspTemp;
|
||||
};
|
||||
|
||||
// parameter access (legacy C style)
|
||||
|
||||
static uint32 PPCInterpreter_getCallParamU32(PPCInterpreter_t* hCPU, uint32 index)
|
||||
{
|
||||
if (index >= 8)
|
||||
return memory_readU32(hCPU->gpr[1] + 8 + (index - 8) * 4);
|
||||
return hCPU->gpr[3 + index];
|
||||
}
|
||||
|
||||
static uint64 PPCInterpreter_getCallParamU64(PPCInterpreter_t* hCPU, uint32 index)
|
||||
{
|
||||
uint64 v = ((uint64)PPCInterpreter_getCallParamU32(hCPU, index)) << 32ULL;
|
||||
v |= ((uint64)PPCInterpreter_getCallParamU32(hCPU, index+1));
|
||||
return v;
|
||||
}
|
||||
|
||||
#define ppcGetCallParamU32(__index) PPCInterpreter_getCallParamU32(hCPU, __index)
|
||||
#define ppcGetCallParamU16(__index) ((uint16)(PPCInterpreter_getCallParamU32(hCPU, __index)&0xFFFF))
|
||||
#define ppcGetCallParamU8(__index) ((uint8)(PPCInterpreter_getCallParamU32(hCPU, __index)&0xFF))
|
||||
#define ppcGetCallParamStruct(__index, __type) ((__type*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
|
||||
// legacy way of accessing parameters
|
||||
#define ppcDefineParamU32(__name, __index) uint32 __name = PPCInterpreter_getCallParamU32(hCPU, __index)
|
||||
#define ppcDefineParamU16(__name, __index) uint16 __name = (uint16)PPCInterpreter_getCallParamU32(hCPU, __index)
|
||||
#define ppcDefineParamU32BEPtr(__name, __index) uint32be* __name = (uint32be*)((uint8*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
#define ppcDefineParamS32(__name, __index) sint32 __name = (sint32)PPCInterpreter_getCallParamU32(hCPU, __index)
|
||||
#define ppcDefineParamU64(__name, __index) uint64 __name = PPCInterpreter_getCallParamU64(hCPU, __index)
|
||||
#define ppcDefineParamMPTR(__name, __index) MPTR __name = (MPTR)PPCInterpreter_getCallParamU32(hCPU, __index)
|
||||
#define ppcDefineParamMEMPTR(__name, __type, __index) MEMPTR<__type> __name{PPCInterpreter_getCallParamU32(hCPU, __index)}
|
||||
#define ppcDefineParamU8(__name, __index) uint8 __name = (PPCInterpreter_getCallParamU32(hCPU, __index)&0xFF)
|
||||
#define ppcDefineParamStructPtr(__name, __type, __index) __type* __name = ((__type*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
#define ppcDefineParamTypePtr(__name, __type, __index) __type* __name = ((__type*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
#define ppcDefineParamPtr(__name, __type, __index) __type* __name = ((__type*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
#define ppcDefineParamStr(__name, __index) char* __name = ((char*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
#define ppcDefineParamUStr(__name, __index) uint8* __name = ((uint8*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
#define ppcDefineParamWStr(__name, __index) wchar_t* __name = ((wchar_t*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
#define ppcDefineParamWStrBE(__name, __index) uint16be* __name = ((uint16be*)memory_getPointerFromVirtualOffsetAllowNull(PPCInterpreter_getCallParamU32(hCPU, __index)))
|
||||
|
||||
// GPR constants
|
||||
|
||||
#define GPR_SP 1
|
||||
|
||||
// interpreter functions
|
||||
|
||||
PPCInterpreter_t* PPCInterpreter_createInstance(unsigned int Entrypoint);
|
||||
PPCInterpreter_t* PPCInterpreter_getCurrentInstance();
|
||||
|
||||
uint64 PPCInterpreter_getMainCoreCycleCounter();
|
||||
|
||||
void PPCInterpreter_nextInstruction(PPCInterpreter_t* cpuInterpreter);
|
||||
void PPCInterpreter_jumpToInstruction(PPCInterpreter_t* cpuInterpreter, uint32 newIP);
|
||||
|
||||
void PPCInterpreterSlim_executeInstruction(PPCInterpreter_t* hCPU);
|
||||
void PPCInterpreterFull_executeInstruction(PPCInterpreter_t* hCPU);
|
||||
|
||||
// misc
|
||||
|
||||
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU);
|
||||
void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v);
|
||||
|
||||
// Wii U clocks (deprecated. Moved to Espresso/Const.h)
|
||||
#define ESPRESSO_CORE_CLOCK 1243125000
|
||||
#define ESPRESSO_BUS_CLOCK 248625000
|
||||
#define ESPRESSO_TIMER_CLOCK (ESPRESSO_BUS_CLOCK/4) // 62156250
|
||||
|
||||
#define ESPRESSO_CORE_CLOCK_TO_TIMER_CLOCK(__cc) ((__cc)/20ULL)
|
||||
|
||||
// interrupt vectors
|
||||
#define CPU_EXCEPTION_DSI 0x00000300
|
||||
#define CPU_EXCEPTION_INTERRUPT 0x00000500 // todo: validate
|
||||
#define CPU_EXCEPTION_FPUUNAVAIL 0x00000800 // todo: validate
|
||||
#define CPU_EXCEPTION_SYSTEMCALL 0x00000C00 // todo: validate
|
||||
#define CPU_EXCEPTION_DECREMENTER 0x00000900 // todo: validate
|
||||
|
||||
// FPU available check
|
||||
//#define FPUCheckAvailable() if ((hCPU->msr & MSR_FP) == 0) { IPTException(hCPU, CPU_EXCEPTION_FPUUNAVAIL); return; }
|
||||
#define FPUCheckAvailable() // since the emulated code always runs in usermode we can assume that MSR_FP is always set
|
||||
|
||||
// spr
|
||||
void PPCSpr_set(PPCInterpreter_t* hCPU, uint32 spr, uint32 newValue);
|
||||
uint32 PPCSpr_get(PPCInterpreter_t* hCPU, uint32 spr);
|
||||
|
||||
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU);
|
||||
uint32 PPCInterpreter_getCurrentCoreIndex();
|
||||
|
||||
// decrement register
|
||||
void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue);
|
||||
|
||||
// timing for main processor
|
||||
extern volatile uint64 ppcMainThreadCycleCounter;
|
||||
extern uint64 ppcCyclesSince2000; // on init this is set to the cycles that passed since 1.1.2000
|
||||
extern uint64 ppcCyclesSince2000TimerClock; // on init this is set to the cycles that passed since 1.1.2000 / 20
|
||||
extern uint64 ppcCyclesSince2000_UTC;
|
||||
extern uint64 ppcMainThreadDECCycleValue; // value that was set to dec register
|
||||
extern uint64 ppcMainThreadDECCycleStart; // at which cycle the dec register was set
|
||||
|
||||
// PPC timer
|
||||
void PPCTimer_init();
|
||||
void PPCTimer_waitForInit();
|
||||
uint64 PPCTimer_getFromRDTSC();
|
||||
bool PPCTimer_hasInvariantRDTSCSupport();
|
||||
|
||||
uint64 PPCTimer_microsecondsToTsc(uint64 us);
|
||||
uint64 PPCTimer_tscToMicroseconds(uint64 us);
|
||||
uint64 PPCTimer_getRawTsc();
|
||||
|
||||
void PPCTimer_start();
|
||||
|
||||
// core info and control
|
||||
extern uint32 ppcThreadQuantum;
|
||||
|
||||
extern thread_local PPCInterpreter_t *ppcInterpreterCurrentInstance;
|
||||
uint8* PPCInterpreterGetAndModifyStackPointer(sint32 offset);
|
||||
uint8* PPCInterpreterGetStackPointer();
|
||||
void PPCInterpreterModifyStackPointer(sint32 offset);
|
||||
|
||||
uint32 PPCInterpreter_makeCallableExportDepr(void (*ppcCallableExport)(PPCInterpreter_t* hCPU));
|
||||
|
||||
static inline float flushDenormalToZero(float f)
|
||||
{
|
||||
uint32 v = *(uint32*)&f;
|
||||
return *(float*)&v;
|
||||
}
|
||||
|
||||
// HLE interface
|
||||
|
||||
typedef void(*HLECALL)(PPCInterpreter_t* hCPU);
|
||||
|
||||
typedef sint32 HLEIDX;
|
||||
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall);
|
||||
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex);
|
||||
|
||||
// HLE scheduler
|
||||
|
||||
void PPCCore_deleteAllThreads();
|
||||
void PPCInterpreter_relinquishTimeslice();
|
||||
|
||||
void PPCCore_boostQuantum(sint32 numCycles);
|
||||
void PPCCore_deboostQuantum(sint32 numCycles);
|
||||
|
||||
void PPCCore_switchToScheduler();
|
||||
void PPCCore_switchToSchedulerWithLock();
|
||||
|
||||
PPCInterpreter_t* PPCCore_executeCallbackInternal(uint32 functionMPTR);
|
||||
void PPCCore_init();
|
||||
|
||||
// LLE scheduler
|
||||
|
||||
void PPCCoreLLE_startSingleCoreScheduler(uint32 entrypoint);
|
194
src/Cafe/HW/Espresso/PPCTimer.cpp
Normal file
194
src/Cafe/HW/Espresso/PPCTimer.cpp
Normal file
|
@ -0,0 +1,194 @@
|
|||
#include "Cafe/HW/Espresso/Const.h"
|
||||
#include <immintrin.h>
|
||||
#include "asm/x64util.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||
|
||||
#if BOOST_OS_LINUX > 0
|
||||
static __inline__
|
||||
unsigned __int64 _umul128(unsigned __int64,
|
||||
unsigned __int64,
|
||||
unsigned __int64*);
|
||||
#endif
|
||||
|
||||
uint64 _rdtscLastMeasure = 0;
|
||||
uint64 _rdtscFrequency = 0;
|
||||
|
||||
struct uint128_t
|
||||
{
|
||||
uint64 low;
|
||||
uint64 high;
|
||||
};
|
||||
|
||||
static_assert(sizeof(uint128_t) == 16);
|
||||
|
||||
uint128_t _rdtscAcc{};
|
||||
|
||||
#pragma intrinsic(__rdtsc)
|
||||
|
||||
uint64 muldiv64(uint64 a, uint64 b, uint64 d)
|
||||
{
|
||||
uint64 diva = a / d;
|
||||
uint64 moda = a % d;
|
||||
uint64 divb = b / d;
|
||||
uint64 modb = b % d;
|
||||
return diva * b + moda * divb + moda * modb / d;
|
||||
}
|
||||
|
||||
bool PPCTimer_hasInvariantRDTSCSupport()
|
||||
{
|
||||
uint32 cpuv[4];
|
||||
__cpuid((int*)cpuv, 0x80000007);
|
||||
return ((cpuv[3] >> 8) & 1);
|
||||
}
|
||||
|
||||
uint64 PPCTimer_estimateRDTSCFrequency()
|
||||
{
|
||||
if (PPCTimer_hasInvariantRDTSCSupport() == false)
|
||||
forceLog_printf("Invariant TSC not supported");
|
||||
|
||||
_mm_mfence();
|
||||
unsigned __int64 tscStart = __rdtsc();
|
||||
unsigned int startTime = GetTickCount();
|
||||
HRTick startTick = HighResolutionTimer::now().getTick();
|
||||
// wait roughly 3 seconds
|
||||
while (true)
|
||||
{
|
||||
if ((GetTickCount() - startTime) >= 3000)
|
||||
break;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
_mm_mfence();
|
||||
HRTick stopTick = HighResolutionTimer::now().getTick();
|
||||
unsigned __int64 tscEnd = __rdtsc();
|
||||
// derive frequency approximation from measured time difference
|
||||
uint64 tsc_diff = tscEnd - tscStart;
|
||||
uint64 hrtFreq = 0;
|
||||
uint64 hrtDiff = HighResolutionTimer::getTimeDiffEx(startTick, stopTick, hrtFreq);
|
||||
uint64 tsc_freq = muldiv64(tsc_diff, hrtFreq, hrtDiff);
|
||||
|
||||
// uint64 freqMultiplier = tsc_freq / hrtFreq;
|
||||
//forceLog_printf("RDTSC measurement test:");
|
||||
//forceLog_printf("TSC-diff: 0x%016llx", tsc_diff);
|
||||
//forceLog_printf("TSC-freq: 0x%016llx", tsc_freq);
|
||||
//forceLog_printf("HPC-diff: 0x%016llx", qpc_diff);
|
||||
//forceLog_printf("HPC-freq: 0x%016llx", (uint64)qpc_freq.QuadPart);
|
||||
//forceLog_printf("Multiplier: 0x%016llx", freqMultiplier);
|
||||
|
||||
return tsc_freq;
|
||||
}
|
||||
|
||||
int PPCTimer_initThread()
|
||||
{
|
||||
_rdtscFrequency = PPCTimer_estimateRDTSCFrequency();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void PPCTimer_init()
|
||||
{
|
||||
std::thread t(PPCTimer_initThread);
|
||||
t.detach();
|
||||
_rdtscLastMeasure = __rdtsc();
|
||||
}
|
||||
|
||||
uint64 _tickSummary = 0;
|
||||
|
||||
void PPCTimer_start()
|
||||
{
|
||||
_rdtscLastMeasure = __rdtsc();
|
||||
_tickSummary = 0;
|
||||
}
|
||||
|
||||
uint64 PPCTimer_getRawTsc()
|
||||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
|
||||
uint64 PPCTimer_microsecondsToTsc(uint64 us)
|
||||
{
|
||||
return (us * _rdtscFrequency) / 1000000ULL;
|
||||
}
|
||||
|
||||
uint64 PPCTimer_tscToMicroseconds(uint64 us)
|
||||
{
|
||||
uint128_t r{};
|
||||
#if BOOST_OS_WINDOWS > 0
|
||||
r.low = _umul128(us, 1000000ULL, &r.high);
|
||||
#else
|
||||
r.low = _umul128(us, 1000000ULL, (unsigned long long*)&r.high);
|
||||
#endif
|
||||
|
||||
|
||||
uint64 remainder;
|
||||
#if _MSC_VER < 1923
|
||||
const uint64 microseconds = udiv128(r.low, r.high, _rdtscFrequency, &remainder);
|
||||
#else
|
||||
const uint64 microseconds = _udiv128(r.high, r.low, _rdtscFrequency, &remainder);
|
||||
#endif
|
||||
|
||||
return microseconds;
|
||||
}
|
||||
|
||||
bool PPCTimer_isReady()
|
||||
{
|
||||
return _rdtscFrequency != 0;
|
||||
}
|
||||
|
||||
void PPCTimer_waitForInit()
|
||||
{
|
||||
while (!PPCTimer_isReady()) std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
|
||||
FSpinlock sTimerSpinlock;
|
||||
|
||||
// thread safe
|
||||
uint64 PPCTimer_getFromRDTSC()
|
||||
{
|
||||
sTimerSpinlock.acquire();
|
||||
_mm_mfence();
|
||||
uint64 rdtscCurrentMeasure = __rdtsc();
|
||||
uint64 rdtscDif = rdtscCurrentMeasure - _rdtscLastMeasure;
|
||||
// optimized max(rdtscDif, 0) without conditionals
|
||||
rdtscDif = rdtscDif & ~(uint64)((sint64)rdtscDif >> 63);
|
||||
|
||||
uint128_t diff{};
|
||||
#if BOOST_OS_WINDOWS > 0
|
||||
diff.low = _umul128(rdtscDif, Espresso::CORE_CLOCK, &diff.high);
|
||||
#else
|
||||
diff.low = _umul128(rdtscDif, Espresso::CORE_CLOCK, (unsigned long long*)&diff.high);
|
||||
#endif
|
||||
|
||||
if(rdtscCurrentMeasure > _rdtscLastMeasure)
|
||||
_rdtscLastMeasure = rdtscCurrentMeasure; // only travel forward in time
|
||||
|
||||
uint8 c = 0;
|
||||
#if BOOST_OS_WINDOWS > 0
|
||||
c = _addcarry_u64(c, _rdtscAcc.low, diff.low, &_rdtscAcc.low);
|
||||
_addcarry_u64(c, _rdtscAcc.high, diff.high, &_rdtscAcc.high);
|
||||
#else
|
||||
// requires casting because of long / long long nonesense
|
||||
c = _addcarry_u64(c, _rdtscAcc.low, diff.low, (unsigned long long*)&_rdtscAcc.low);
|
||||
_addcarry_u64(c, _rdtscAcc.high, diff.high, (unsigned long long*)&_rdtscAcc.high);
|
||||
#endif
|
||||
|
||||
uint64 remainder;
|
||||
#if _MSC_VER < 1923
|
||||
uint64 elapsedTick = udiv128(_rdtscAcc.low, _rdtscAcc.high, _rdtscFrequency, &remainder);
|
||||
#else
|
||||
uint64 elapsedTick = _udiv128(_rdtscAcc.high, _rdtscAcc.low, _rdtscFrequency, &remainder);
|
||||
#endif
|
||||
|
||||
_rdtscAcc.low = remainder;
|
||||
_rdtscAcc.high = 0;
|
||||
|
||||
// timer scaling
|
||||
elapsedTick <<= 3ull; // *8
|
||||
uint8 timerShiftFactor = ActiveSettings::GetTimerShiftFactor();
|
||||
elapsedTick >>= timerShiftFactor;
|
||||
|
||||
_tickSummary += elapsedTick;
|
||||
|
||||
sTimerSpinlock.release();
|
||||
return _tickSummary;
|
||||
}
|
293
src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
Normal file
293
src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
Normal file
|
@ -0,0 +1,293 @@
|
|||
#pragma once
|
||||
#include "Cafe/HW/Espresso/EspressoISA.h"
|
||||
#include "Cafe/HW/MMU/MMU.h"
|
||||
|
||||
bool GamePatch_IsNonReturnFunction(uint32 hleIndex);
|
||||
|
||||
// utility class to determine shape of a function
|
||||
class PPCFunctionBoundaryTracker
|
||||
{
|
||||
public:
|
||||
struct PPCRange_t
|
||||
{
|
||||
PPCRange_t() {};
|
||||
PPCRange_t(uint32 _startAddress) : startAddress(_startAddress) {};
|
||||
|
||||
uint32 startAddress{};
|
||||
uint32 length{};
|
||||
//bool isProcessed{false};
|
||||
|
||||
uint32 getEndAddress() const { return startAddress + length; };
|
||||
};
|
||||
|
||||
public:
|
||||
void trackStartPoint(MPTR startAddress)
|
||||
{
|
||||
processRange(startAddress, nullptr, nullptr);
|
||||
processBranchTargets();
|
||||
}
|
||||
|
||||
bool getRangeForAddress(uint32 address, PPCRange_t& range)
|
||||
{
|
||||
for (auto itr : map_ranges)
|
||||
{
|
||||
if (address >= itr->startAddress && address < (itr->startAddress + itr->length))
|
||||
{
|
||||
range = *itr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
void addBranchDestination(PPCRange_t* sourceRange, MPTR address)
|
||||
{
|
||||
map_branchTargets.emplace(address);
|
||||
}
|
||||
|
||||
// process flow of instruction
|
||||
// returns false if the IP cannot increment past the current instruction
|
||||
bool processInstruction(PPCRange_t* range, MPTR address)
|
||||
{
|
||||
// parse instructions
|
||||
uint32 opcode = memory_readU32(address);
|
||||
switch (Espresso::GetPrimaryOpcode(opcode))
|
||||
{
|
||||
case Espresso::PrimaryOpcode::ZERO:
|
||||
{
|
||||
if (opcode == 0)
|
||||
return false; // invalid instruction
|
||||
break;
|
||||
}
|
||||
case Espresso::PrimaryOpcode::VIRTUAL_HLE:
|
||||
{
|
||||
// end of function
|
||||
// is there a jump to a instruction after this one?
|
||||
uint32 hleFuncId = opcode & 0xFFFF;
|
||||
if (hleFuncId >= 0x1000 && hleFuncId < 0x4000)
|
||||
{
|
||||
if (GamePatch_IsNonReturnFunction(hleFuncId - 0x1000) == false)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case Espresso::PrimaryOpcode::BC:
|
||||
{
|
||||
uint32 BD, BI;
|
||||
Espresso::BOField BO;
|
||||
bool AA, LK;
|
||||
Espresso::decodeOp_BC(opcode, BD, BO, BI, AA, LK);
|
||||
uint32 branchTarget = AA ? BD : BD + address;
|
||||
if (!LK)
|
||||
addBranchDestination(range, branchTarget);
|
||||
break;
|
||||
}
|
||||
case Espresso::PrimaryOpcode::B:
|
||||
{
|
||||
uint32 LI;
|
||||
bool AA, LK;
|
||||
Espresso::decodeOp_B(opcode, LI, AA, LK);
|
||||
uint32 branchTarget = AA ? LI : LI + address;
|
||||
if (!LK)
|
||||
{
|
||||
addBranchDestination(range, branchTarget);
|
||||
// if the next two or previous two instructions are branch instructions, we assume that they are destinations of a jump table
|
||||
// todo - can we make this more reliable by checking for BCTR or similar instructions first?
|
||||
// example: The Swapper 0x01B1FC04
|
||||
if (PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address + 4)) && PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address + 8)) ||
|
||||
PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address - 8)) && PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address - 4)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false; // current flow ends at unconditional branch instruction
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Espresso::PrimaryOpcode::GROUP_19:
|
||||
switch (Espresso::GetGroup19Opcode(opcode))
|
||||
{
|
||||
case Espresso::Opcode19::BCLR:
|
||||
{
|
||||
Espresso::BOField BO;
|
||||
uint32 BI;
|
||||
bool LK;
|
||||
Espresso::decodeOp_BCLR(opcode, BO, BI, LK);
|
||||
if (BO.branchAlways() && !LK)
|
||||
{
|
||||
// unconditional BLR
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Espresso::Opcode19::BCCTR:
|
||||
if (opcode == 0x4E800420)
|
||||
{
|
||||
// unconditional BCTR
|
||||
// this instruction is often used for switch statements, therefore we should be wary of ending the function here
|
||||
// It's better to overestimate function size than to predict sizes that are too short
|
||||
|
||||
// Currently we only end the function if the BCTR is followed by a NOP (alignment) or invalid instruction
|
||||
// todo: improve robustness, find better ways to handle false positives
|
||||
uint32 nextOpcode = memory_readU32(address + 4);
|
||||
|
||||
if (nextOpcode == 0x60000000 || PPCRecompilerCalcFuncSize_isValidInstruction(nextOpcode) == false)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// conditional BCTR
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void checkForCollisions()
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
uint32 endOfPrevious = 0;
|
||||
for (auto itr : map_ranges)
|
||||
{
|
||||
if (endOfPrevious > itr->startAddress)
|
||||
{
|
||||
cemu_assert_debug(false);
|
||||
}
|
||||
endOfPrevious = itr->startAddress + itr->length;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// nextRange must point to the closest range after startAddress, or NULL if there is none
|
||||
void processRange(MPTR startAddress, PPCRange_t* previousRange, PPCRange_t* nextRange)
|
||||
{
|
||||
checkForCollisions();
|
||||
cemu_assert_debug(previousRange == nullptr || (startAddress == (previousRange->startAddress + previousRange->length)));
|
||||
PPCRange_t* newRange;
|
||||
if (previousRange && (previousRange->startAddress + previousRange->length) == startAddress)
|
||||
{
|
||||
newRange = previousRange;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_debug(previousRange == nullptr);
|
||||
newRange = new PPCRange_t(startAddress);
|
||||
map_ranges.emplace(newRange);
|
||||
}
|
||||
// process instruction flow until it is interrupted by a non-conditional branch
|
||||
MPTR currentAddress = startAddress;
|
||||
MPTR endAddress = 0xFFFFFFFF;
|
||||
if (nextRange)
|
||||
endAddress = nextRange->startAddress;
|
||||
while (currentAddress < endAddress)
|
||||
{
|
||||
if (!processInstruction(newRange, currentAddress))
|
||||
{
|
||||
currentAddress += 4;
|
||||
break;
|
||||
}
|
||||
currentAddress += 4;
|
||||
}
|
||||
newRange->length = currentAddress - newRange->startAddress;
|
||||
|
||||
if (nextRange && currentAddress >= nextRange->startAddress)
|
||||
{
|
||||
// merge with next range
|
||||
newRange->length = (nextRange->startAddress + nextRange->length) - newRange->startAddress;
|
||||
map_ranges.erase(nextRange);
|
||||
delete nextRange;
|
||||
checkForCollisions();
|
||||
return;
|
||||
}
|
||||
checkForCollisions();
|
||||
}
|
||||
|
||||
// find first unvisited branch target and start a new range there
|
||||
// return true if method should be called again
|
||||
bool processBranchTargetsSinglePass()
|
||||
{
|
||||
cemu_assert_debug(!map_ranges.empty());
|
||||
auto rangeItr = map_ranges.begin();
|
||||
|
||||
PPCRange_t* previousRange = nullptr;
|
||||
for (std::set<uint32_t>::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); )
|
||||
{
|
||||
while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr))
|
||||
{
|
||||
previousRange = *rangeItr;
|
||||
rangeItr++;
|
||||
if (rangeItr == map_ranges.end())
|
||||
{
|
||||
// last range reached
|
||||
if ((previousRange->startAddress + previousRange->length) == *targetItr)
|
||||
processRange(*targetItr, previousRange, nullptr);
|
||||
else
|
||||
processRange(*targetItr, nullptr, nullptr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((*targetItr) >= (*rangeItr)->startAddress &&
|
||||
(*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length))
|
||||
{
|
||||
// delete visited targets
|
||||
targetItr = map_branchTargets.erase(targetItr);
|
||||
continue;
|
||||
}
|
||||
|
||||
cemu_assert_debug((*rangeItr)->startAddress > (*targetItr));
|
||||
if (previousRange && (previousRange->startAddress + previousRange->length) == *targetItr)
|
||||
processRange(*targetItr, previousRange, *rangeItr); // extend previousRange
|
||||
else
|
||||
processRange(*targetItr, nullptr, *rangeItr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void processBranchTargets()
|
||||
{
|
||||
while (processBranchTargetsSinglePass());
|
||||
}
|
||||
|
||||
private:
|
||||
bool PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(uint32 opcode)
|
||||
{
|
||||
if (Espresso::GetPrimaryOpcode(opcode) == Espresso::PrimaryOpcode::B)
|
||||
{
|
||||
uint32 LI;
|
||||
bool AA, LK;
|
||||
Espresso::decodeOp_B(opcode, LI, AA, LK);
|
||||
if (!LK)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PPCRecompilerCalcFuncSize_isValidInstruction(uint32 opcode)
|
||||
{
|
||||
if ((opcode >> 26) == 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
struct RangePtrCmp
|
||||
{
|
||||
bool operator()(const PPCRange_t* lhs, const PPCRange_t* rhs) const
|
||||
{
|
||||
return lhs->startAddress < rhs->startAddress;
|
||||
}
|
||||
};
|
||||
|
||||
std::set<PPCRange_t*, RangePtrCmp> map_ranges;
|
||||
std::set<uint32> map_branchTargets;
|
||||
};
|
593
src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp
Normal file
593
src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp
Normal file
|
@ -0,0 +1,593 @@
|
|||
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
|
||||
#include "PPCFunctionBoundaryTracker.h"
|
||||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "Cafe/OS/RPL/rpl.h"
|
||||
#include "util/containers/RangeStore.h"
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "config/LaunchSettings.h"
|
||||
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "Common/ExceptionHandler/ExceptionHandler.h"
|
||||
#include "util/helpers/helpers.h"
|
||||
|
||||
#include "util/MemMapper/MemMapper.h"
|
||||
|
||||
struct PPCInvalidationRange
|
||||
{
|
||||
MPTR startAddress;
|
||||
uint32 size;
|
||||
|
||||
PPCInvalidationRange(MPTR _startAddress, uint32 _size) : startAddress(_startAddress), size(_size) {};
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
FSpinlock recompilerSpinlock;
|
||||
std::queue<MPTR> targetQueue;
|
||||
std::vector<PPCInvalidationRange> invalidationRanges;
|
||||
}PPCRecompilerState;
|
||||
|
||||
RangeStore<PPCRecFunction_t*, uint32, 7703, 0x2000> rangeStore_ppcRanges;
|
||||
|
||||
void ATTR_MS_ABI (*PPCRecompiler_enterRecompilerCode)(uint64 codeMem, uint64 ppcInterpreterInstance);
|
||||
void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_visited)();
|
||||
void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
|
||||
|
||||
PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
|
||||
|
||||
bool ppcRecompilerEnabled = false;
|
||||
|
||||
// this function does never block and can fail if the recompiler lock cannot be acquired immediately
|
||||
void PPCRecompiler_visitAddressNoBlock(uint32 enterAddress)
|
||||
{
|
||||
// quick read-only check without lock
|
||||
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
return;
|
||||
// try to acquire lock
|
||||
if (!PPCRecompilerState.recompilerSpinlock.tryAcquire())
|
||||
return;
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
|
||||
if (funcPtr != PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
{
|
||||
// was visited since previous check
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
return;
|
||||
}
|
||||
// add to recompilation queue and flag as visited
|
||||
PPCRecompilerState.targetQueue.emplace(enterAddress);
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited;
|
||||
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
}
|
||||
|
||||
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress)
|
||||
{
|
||||
if (ppcRecompilerEnabled == false)
|
||||
return;
|
||||
PPCRecompiler_visitAddressNoBlock(enterAddress);
|
||||
}
|
||||
|
||||
void PPCRecompiler_enter(PPCInterpreter_t* hCPU, PPCREC_JUMP_ENTRY funcPtr)
|
||||
{
|
||||
#if BOOST_OS_WINDOWS > 0
|
||||
uint32 prevState = _controlfp(0, 0);
|
||||
_controlfp(_RC_NEAR, _MCW_RC);
|
||||
PPCRecompiler_enterRecompilerCode((uint64)funcPtr, (uint64)hCPU);
|
||||
_controlfp(prevState, _MCW_RC);
|
||||
// debug recompiler exit - useful to find frequently executed functions which couldn't be recompiled
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (hCPU->remainingCycles > 0 && GetAsyncKeyState(VK_F4))
|
||||
{
|
||||
auto t = std::chrono::high_resolution_clock::now();
|
||||
auto dur = std::chrono::duration_cast<std::chrono::microseconds>(t.time_since_epoch()).count();
|
||||
forceLog_printf("Recompiler exit: 0x%08x LR: 0x%08x Timestamp %lld.%04lld", hCPU->instructionPointer, hCPU->spr.LR, dur / 1000LL, (dur % 1000LL));
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
PPCRecompiler_enterRecompilerCode((uint64)funcPtr, (uint64)hCPU);
|
||||
#endif
|
||||
// after leaving recompiler prematurely attempt to recompile the code at the new location
|
||||
if (hCPU->remainingCycles > 0)
|
||||
{
|
||||
PPCRecompiler_visitAddressNoBlock(hCPU->instructionPointer);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_attemptEnterWithoutRecompile(PPCInterpreter_t* hCPU, uint32 enterAddress)
|
||||
{
|
||||
cemu_assert_debug(hCPU->instructionPointer == enterAddress);
|
||||
if (ppcRecompilerEnabled == false)
|
||||
return;
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
|
||||
if (funcPtr != PPCRecompiler_leaveRecompilerCode_unvisited && funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
cemu_assert_debug(ppcRecompilerInstanceData != nullptr);
|
||||
PPCRecompiler_enter(hCPU, funcPtr);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress)
|
||||
{
|
||||
cemu_assert_debug(hCPU->instructionPointer == enterAddress);
|
||||
if (ppcRecompilerEnabled == false)
|
||||
return;
|
||||
if (hCPU->remainingCycles <= 0)
|
||||
return;
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
|
||||
if (funcPtr == PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
{
|
||||
PPCRecompiler_visitAddressNoBlock(enterAddress);
|
||||
}
|
||||
else if (funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
// enter
|
||||
cemu_assert_debug(ppcRecompilerInstanceData != nullptr);
|
||||
PPCRecompiler_enter(hCPU, funcPtr);
|
||||
}
|
||||
}
|
||||
|
||||
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut)
|
||||
{
|
||||
if (range.startAddress >= PPC_REC_CODE_AREA_END)
|
||||
{
|
||||
cemuLog_force("Attempting to recompile function outside of allowed code area");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32 codeGenRangeStart;
|
||||
uint32 codeGenRangeSize = 0;
|
||||
coreinit::OSGetCodegenVirtAddrRangeInternal(codeGenRangeStart, codeGenRangeSize);
|
||||
if (codeGenRangeSize != 0)
|
||||
{
|
||||
if (range.startAddress >= codeGenRangeStart && range.startAddress < (codeGenRangeStart + codeGenRangeSize))
|
||||
{
|
||||
if (coreinit::codeGenShouldAvoid())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
|
||||
ppcRecFunc->ppcAddress = range.startAddress;
|
||||
ppcRecFunc->ppcSize = range.length;
|
||||
// generate intermediate code
|
||||
ppcImlGenContext_t ppcImlGenContext = { 0 };
|
||||
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses);
|
||||
if (compiledSuccessfully == false)
|
||||
{
|
||||
// todo: Free everything
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
delete ppcRecFunc;
|
||||
return NULL;
|
||||
}
|
||||
// emit x64 code
|
||||
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
|
||||
if (x64GenerationSuccess == false)
|
||||
{
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// collect list of PPC-->x64 entry points
|
||||
entryPointsOut.clear();
|
||||
for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
|
||||
if (imlSegment->isEnterable == false)
|
||||
continue;
|
||||
|
||||
uint32 ppcEnterOffset = imlSegment->enterPPCAddress;
|
||||
uint32 x64Offset = imlSegment->x64Offset;
|
||||
|
||||
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
|
||||
}
|
||||
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return ppcRecFunc;
|
||||
}
|
||||
|
||||
bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector<std::pair<MPTR, uint32>>& entryPoints)
|
||||
{
|
||||
// update jump table
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
|
||||
// check if the initial entrypoint is still flagged for recompilation
|
||||
// its possible that the range has been invalidated during the time it took to translate the function
|
||||
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[initialEntryPoint / 4] != PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
return false;
|
||||
}
|
||||
|
||||
// check if the current range got invalidated in the time it took to recompile it
|
||||
bool isInvalidated = false;
|
||||
for (auto& invRange : PPCRecompilerState.invalidationRanges)
|
||||
{
|
||||
MPTR rStartAddr = invRange.startAddress;
|
||||
MPTR rEndAddr = rStartAddr + invRange.size;
|
||||
for (auto& recFuncRange : ppcRecFunc->list_ranges)
|
||||
{
|
||||
if (recFuncRange.ppcAddress < (rEndAddr) && (recFuncRange.ppcAddress + recFuncRange.ppcSize) >= rStartAddr)
|
||||
{
|
||||
isInvalidated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
PPCRecompilerState.invalidationRanges.clear();
|
||||
if (isInvalidated)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// update jump table
|
||||
for (auto& itr : entryPoints)
|
||||
{
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[itr.first / 4] = (PPCREC_JUMP_ENTRY)((uint8*)ppcRecFunc->x86Code + itr.second);
|
||||
}
|
||||
|
||||
|
||||
// due to inlining, some entrypoints can get optimized away
|
||||
// therefore we reset all addresses that are still marked as visited (but not recompiled)
|
||||
// we dont remove the points from the queue but any address thats not marked as visited won't get recompiled
|
||||
// if they are reachable, the interpreter will queue them again
|
||||
for (uint32 v = range.startAddress; v <= (range.startAddress + range.length); v += 4)
|
||||
{
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[v / 4];
|
||||
if (funcPtr == PPCRecompiler_leaveRecompilerCode_visited)
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[v / 4] = PPCRecompiler_leaveRecompilerCode_unvisited;
|
||||
}
|
||||
|
||||
// register ranges
|
||||
for (auto& r : ppcRecFunc->list_ranges)
|
||||
{
|
||||
r.storedRange = rangeStore_ppcRanges.storeRange(ppcRecFunc, r.ppcAddress, r.ppcAddress + r.ppcSize);
|
||||
}
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PPCRecompiler_recompileAtAddress(uint32 address)
|
||||
{
|
||||
cemu_assert_debug(ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[address / 4] == PPCRecompiler_leaveRecompilerCode_visited);
|
||||
|
||||
// get size
|
||||
PPCFunctionBoundaryTracker funcBoundaries;
|
||||
funcBoundaries.trackStartPoint(address);
|
||||
// get range that encompasses address
|
||||
PPCFunctionBoundaryTracker::PPCRange_t range;
|
||||
if (funcBoundaries.getRangeForAddress(address, range) == false)
|
||||
{
|
||||
cemu_assert_debug(false);
|
||||
}
|
||||
|
||||
// todo - use info from previously compiled ranges to determine full size of this function (and merge all the entryAddresses)
|
||||
|
||||
// collect all currently known entry points for this range
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
|
||||
std::set<uint32> entryAddresses;
|
||||
|
||||
entryAddresses.emplace(address);
|
||||
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
|
||||
std::vector<std::pair<MPTR, uint32>> functionEntryPoints;
|
||||
auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints);
|
||||
|
||||
if (!func)
|
||||
{
|
||||
return; // recompilation failed
|
||||
}
|
||||
bool r = PPCRecompiler_makeRecompiledFunctionActive(address, range, func, functionEntryPoints);
|
||||
}
|
||||
|
||||
void PPCRecompiler_thread()
|
||||
{
|
||||
SetThreadName("PPCRecompiler_thread");
|
||||
while (true)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
// asynchronous recompilation:
|
||||
// 1) take address from queue
|
||||
// 2) check if address is still marked as visited
|
||||
// 3) if yes -> calculate size, gather all entry points, recompile and update jump table
|
||||
while (true)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
if (PPCRecompilerState.targetQueue.empty())
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
break;
|
||||
}
|
||||
auto enterAddress = PPCRecompilerState.targetQueue.front();
|
||||
PPCRecompilerState.targetQueue.pop();
|
||||
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
|
||||
if (funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
// only recompile functions if marked as visited
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
continue;
|
||||
}
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
|
||||
PPCRecompiler_recompileAtAddress(enterAddress);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define PPC_REC_ALLOC_BLOCK_SIZE (4*1024*1024) // 4MB
|
||||
|
||||
std::bitset<(MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE) / PPC_REC_ALLOC_BLOCK_SIZE> ppcRecompiler_reservedBlockMask;
|
||||
|
||||
void PPCRecompiler_reserveLookupTableBlock(uint32 offset)
|
||||
{
|
||||
uint32 blockIndex = offset / PPC_REC_ALLOC_BLOCK_SIZE;
|
||||
offset = blockIndex * PPC_REC_ALLOC_BLOCK_SIZE;
|
||||
|
||||
if (ppcRecompiler_reservedBlockMask[blockIndex])
|
||||
return;
|
||||
ppcRecompiler_reservedBlockMask[blockIndex] = true;
|
||||
|
||||
void* p1 = MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->ppcRecompilerFuncTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
void* p3 = MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
if( !p1 || !p3 )
|
||||
{
|
||||
forceLog_printf("Failed to allocate memory for recompiler (0x%08x)", offset);
|
||||
cemu_assert(false);
|
||||
return;
|
||||
}
|
||||
for(uint32 i=0; i<PPC_REC_ALLOC_BLOCK_SIZE/4; i++)
|
||||
{
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset/4+i] = PPCRecompiler_leaveRecompilerCode_unvisited;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_allocateRange(uint32 startAddress, uint32 size)
|
||||
{
|
||||
if (ppcRecompilerInstanceData == nullptr)
|
||||
return;
|
||||
uint32 endAddress = (startAddress + size + PPC_REC_ALLOC_BLOCK_SIZE - 1) & ~(PPC_REC_ALLOC_BLOCK_SIZE-1);
|
||||
startAddress = (startAddress) & ~(PPC_REC_ALLOC_BLOCK_SIZE-1);
|
||||
startAddress = std::min(startAddress, (uint32)MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE);
|
||||
endAddress = std::min(endAddress, (uint32)MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE);
|
||||
for (uint32 i = startAddress; i < endAddress; i += PPC_REC_ALLOC_BLOCK_SIZE)
|
||||
{
|
||||
PPCRecompiler_reserveLookupTableBlock(i);
|
||||
}
|
||||
}
|
||||
|
||||
struct ppcRecompilerFuncRange_t
|
||||
{
|
||||
MPTR ppcStart;
|
||||
uint32 ppcSize;
|
||||
void* x86Start;
|
||||
size_t x86Size;
|
||||
};
|
||||
|
||||
DLLEXPORT bool PPCRecompiler_findFuncRanges(uint32 addr, ppcRecompilerFuncRange_t* rangesOut, size_t* countInOut)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
size_t countIn = *countInOut;
|
||||
size_t countOut = 0;
|
||||
|
||||
rangeStore_ppcRanges.findRanges(addr, addr + 4, [rangesOut, countIn, &countOut](uint32 start, uint32 end, PPCRecFunction_t* func)
|
||||
{
|
||||
if (countOut < countIn)
|
||||
{
|
||||
rangesOut[countOut].ppcStart = start;
|
||||
rangesOut[countOut].ppcSize = (end-start);
|
||||
rangesOut[countOut].x86Start = func->x86Code;
|
||||
rangesOut[countOut].x86Size = func->x86Size;
|
||||
}
|
||||
countOut++;
|
||||
}
|
||||
);
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
*countInOut = countOut;
|
||||
if (countOut > countIn)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
DLLEXPORT uintptr_t* PPCRecompiler_getJumpTableBase()
|
||||
{
|
||||
if (ppcRecompilerInstanceData == nullptr)
|
||||
return nullptr;
|
||||
return (uintptr_t*)ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable;
|
||||
}
|
||||
|
||||
void PPCRecompiler_invalidateTableRange(uint32 offset, uint32 size)
|
||||
{
|
||||
if (ppcRecompilerInstanceData == nullptr)
|
||||
return;
|
||||
for (uint32 i = 0; i < size / 4; i++)
|
||||
{
|
||||
ppcRecompilerInstanceData->ppcRecompilerFuncTable[offset / 4 + i] = nullptr;
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset / 4 + i] = PPCRecompiler_leaveRecompilerCode_unvisited;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_deleteFunction(PPCRecFunction_t* func)
|
||||
{
|
||||
// assumes PPCRecompilerState.recompilerSpinlock is already held
|
||||
cemu_assert_debug(PPCRecompilerState.recompilerSpinlock.isHolding());
|
||||
for (auto& r : func->list_ranges)
|
||||
{
|
||||
PPCRecompiler_invalidateTableRange(r.ppcAddress, r.ppcSize);
|
||||
if(r.storedRange)
|
||||
rangeStore_ppcRanges.deleteRange(r.storedRange);
|
||||
r.storedRange = nullptr;
|
||||
}
|
||||
// todo - free x86 code
|
||||
}
|
||||
|
||||
DLLEXPORT void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr)
|
||||
{
|
||||
if (ppcRecompilerEnabled == false)
|
||||
return;
|
||||
if (startAddr >= PPC_REC_CODE_AREA_SIZE)
|
||||
return;
|
||||
cemu_assert_debug(endAddr >= startAddr);
|
||||
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
|
||||
uint32 rStart;
|
||||
uint32 rEnd;
|
||||
PPCRecFunction_t* rFunc;
|
||||
|
||||
// mark range as unvisited
|
||||
for (uint64 currentAddr = (uint64)startAddr&~3; currentAddr < (uint64)(endAddr&~3); currentAddr += 4)
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[currentAddr / 4] = PPCRecompiler_leaveRecompilerCode_unvisited;
|
||||
|
||||
// add entry to invalidation queue
|
||||
PPCRecompilerState.invalidationRanges.emplace_back(startAddr, endAddr-startAddr);
|
||||
|
||||
|
||||
while (rangeStore_ppcRanges.findFirstRange(startAddr, endAddr, rStart, rEnd, rFunc) )
|
||||
{
|
||||
PPCRecompiler_deleteFunction(rFunc);
|
||||
}
|
||||
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
}
|
||||
|
||||
void PPCRecompiler_init()
|
||||
{
|
||||
if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter)
|
||||
{
|
||||
ppcRecompilerEnabled = false;
|
||||
return;
|
||||
}
|
||||
if (LaunchSettings::ForceInterpreter())
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter was passed");
|
||||
return;
|
||||
}
|
||||
if (ppcRecompilerInstanceData)
|
||||
{
|
||||
MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t));
|
||||
ppcRecompilerInstanceData = nullptr;
|
||||
}
|
||||
debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
|
||||
ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
|
||||
MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
uint32 codeRegionEnd = RPLLoader_GetMaxCodeOffset();
|
||||
codeRegionEnd = (codeRegionEnd + PPC_REC_ALLOC_BLOCK_SIZE - 1) & ~(PPC_REC_ALLOC_BLOCK_SIZE - 1);
|
||||
|
||||
uint32 codeRegionSize = codeRegionEnd - PPC_REC_CODE_AREA_START;
|
||||
forceLogDebug_printf("Allocating recompiler tables for range 0x%08x-0x%08x", PPC_REC_CODE_AREA_START, codeRegionEnd);
|
||||
|
||||
for (uint32 i = 0; i < codeRegionSize; i += PPC_REC_ALLOC_BLOCK_SIZE)
|
||||
{
|
||||
PPCRecompiler_reserveLookupTableBlock(i);
|
||||
}
|
||||
|
||||
// init x64 recompiler instance data
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000;
|
||||
|
||||
// setup GQR scale tables
|
||||
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
float a = 1.0f / (float)(1u << i);
|
||||
float b = 0;
|
||||
if (i == 0)
|
||||
b = 4294967296.0f;
|
||||
else
|
||||
b = (float)(1u << (32u - i));
|
||||
|
||||
float ar = (float)(1u << i);
|
||||
float br = 0;
|
||||
if (i == 0)
|
||||
br = 1.0f / 4294967296.0f;
|
||||
else
|
||||
br = 1.0f / (float)(1u << (32u - i));
|
||||
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 0] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 1] = 1.0f;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 0] = b;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 0] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 1] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 0] = b;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 1] = b;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 0] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 1] = 1.0f;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 0] = br;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 0] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 1] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 0] = br;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br;
|
||||
}
|
||||
|
||||
// mxcsr
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
|
||||
|
||||
// query processor extensions
|
||||
int cpuInfo[4];
|
||||
__cpuid(cpuInfo, 0x80000001);
|
||||
hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0;
|
||||
__cpuid(cpuInfo, 0x1);
|
||||
hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0;
|
||||
hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0;
|
||||
__cpuidex(cpuInfo, 0x7, 0);
|
||||
hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0;
|
||||
|
||||
forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", hasLZCNTSupport ? "LZCNT " : "", hasMOVBESupport ? "MOVBE " : "", hasAVXSupport ? "AVX " : "");
|
||||
|
||||
ppcRecompilerEnabled = true;
|
||||
|
||||
// launch recompilation thread
|
||||
std::thread t_recompiler(PPCRecompiler_thread);
|
||||
t_recompiler.detach();
|
||||
}
|
399
src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h
Normal file
399
src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h
Normal file
|
@ -0,0 +1,399 @@
|
|||
#include <vector>
|
||||
|
||||
#define PPC_REC_CODE_AREA_START (0x00000000) // lower bound of executable memory area. Recompiler expects this address to be 0
|
||||
#define PPC_REC_CODE_AREA_END (0x10000000) // upper bound of executable memory area
|
||||
#define PPC_REC_CODE_AREA_SIZE (PPC_REC_CODE_AREA_END - PPC_REC_CODE_AREA_START)
|
||||
|
||||
#define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1))
|
||||
|
||||
#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 ppcSize;
|
||||
//void* x86Start;
|
||||
//size_t x86Size;
|
||||
void* storedRange;
|
||||
}ppcRecRange_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 ppcSize; // ppc code size of function
|
||||
void* x86Code; // pointer to x86 code
|
||||
size_t x86Size;
|
||||
std::vector<ppcRecRange_t> list_ranges;
|
||||
}PPCRecFunction_t;
|
||||
|
||||
#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0)
|
||||
#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1)
|
||||
#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision
|
||||
#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8 type;
|
||||
uint8 operation;
|
||||
uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr.
|
||||
uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior
|
||||
uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated
|
||||
uint32 associatedPPCAddress; // ppc address that is associated with this instruction
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8 _padding[7];
|
||||
}padding;
|
||||
struct
|
||||
{
|
||||
// R (op) A [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
}op_r_r;
|
||||
struct
|
||||
{
|
||||
// R = A (op) B [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
uint8 registerB;
|
||||
}op_r_r_r;
|
||||
struct
|
||||
{
|
||||
// R = A (op) immS32 [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
sint32 immS32;
|
||||
}op_r_r_s32;
|
||||
struct
|
||||
{
|
||||
// R/F = NAME or NAME = R/F
|
||||
uint8 registerIndex;
|
||||
uint8 copyWidth;
|
||||
uint32 name;
|
||||
uint8 flags;
|
||||
}op_r_name;
|
||||
struct
|
||||
{
|
||||
// R (op) s32 [update cr* in mode *]
|
||||
uint8 registerIndex;
|
||||
sint32 immS32;
|
||||
}op_r_immS32;
|
||||
struct
|
||||
{
|
||||
uint32 address;
|
||||
uint8 flags;
|
||||
}op_jumpmark;
|
||||
struct
|
||||
{
|
||||
uint32 param;
|
||||
uint32 param2;
|
||||
uint16 paramU16;
|
||||
}op_macro;
|
||||
struct
|
||||
{
|
||||
uint32 jumpmarkAddress;
|
||||
bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress
|
||||
uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup)
|
||||
uint8 crRegisterIndex;
|
||||
uint8 crBitIndex;
|
||||
bool bitMustBeSet;
|
||||
}op_conditionalJump;
|
||||
struct
|
||||
{
|
||||
uint8 registerData;
|
||||
uint8 registerMem;
|
||||
uint8 registerMem2;
|
||||
uint8 registerGQR;
|
||||
uint8 copyWidth;
|
||||
//uint8 flags;
|
||||
struct
|
||||
{
|
||||
bool swapEndian : 1;
|
||||
bool signExtend : 1;
|
||||
bool notExpanded : 1; // for floats
|
||||
}flags2;
|
||||
uint8 mode; // transfer mode (copy width, ps0/ps1 behavior)
|
||||
sint32 immS32;
|
||||
}op_storeLoad;
|
||||
struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8 registerMem;
|
||||
sint32 immS32;
|
||||
}src;
|
||||
struct
|
||||
{
|
||||
uint8 registerMem;
|
||||
sint32 immS32;
|
||||
}dst;
|
||||
uint8 copyWidth;
|
||||
}op_mem2mem;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperand;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperandA;
|
||||
uint8 registerOperandB;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperandA;
|
||||
uint8 registerOperandB;
|
||||
uint8 registerOperandC;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
//uint8 flags;
|
||||
}op_fpr_r;
|
||||
struct
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 x64Offset;
|
||||
}op_ppcEnter;
|
||||
struct
|
||||
{
|
||||
uint8 crD; // crBitIndex (result)
|
||||
uint8 crA; // crBitIndex
|
||||
uint8 crB; // crBitIndex
|
||||
}op_cr;
|
||||
// conditional operations (emitted if supported by target platform)
|
||||
struct
|
||||
{
|
||||
// r_s32
|
||||
uint8 registerIndex;
|
||||
sint32 immS32;
|
||||
// condition
|
||||
uint8 crRegisterIndex;
|
||||
uint8 crBitIndex;
|
||||
bool bitMustBeSet;
|
||||
}op_conditional_r_s32;
|
||||
};
|
||||
}PPCRecImlInstruction_t;
|
||||
|
||||
typedef struct _PPCRecImlSegment_t PPCRecImlSegment_t;
|
||||
|
||||
typedef struct _ppcRecompilerSegmentPoint_t
|
||||
{
|
||||
sint32 index;
|
||||
PPCRecImlSegment_t* imlSegment;
|
||||
_ppcRecompilerSegmentPoint_t* next;
|
||||
_ppcRecompilerSegmentPoint_t* prev;
|
||||
}ppcRecompilerSegmentPoint_t;
|
||||
|
||||
struct raLivenessLocation_t
|
||||
{
|
||||
sint32 index;
|
||||
bool isRead;
|
||||
bool isWrite;
|
||||
|
||||
raLivenessLocation_t() {};
|
||||
|
||||
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
|
||||
: index(index), isRead(isRead), isWrite(isWrite) {};
|
||||
};
|
||||
|
||||
struct raLivenessSubrangeLink_t
|
||||
{
|
||||
struct raLivenessSubrange_t* prev;
|
||||
struct raLivenessSubrange_t* next;
|
||||
};
|
||||
|
||||
struct raLivenessSubrange_t
|
||||
{
|
||||
struct raLivenessRange_t* range;
|
||||
PPCRecImlSegment_t* imlSegment;
|
||||
ppcRecompilerSegmentPoint_t start;
|
||||
ppcRecompilerSegmentPoint_t end;
|
||||
// dirty state tracking
|
||||
bool _noLoad;
|
||||
bool hasStore;
|
||||
bool hasStoreDelayed;
|
||||
// next
|
||||
raLivenessSubrange_t* subrangeBranchTaken;
|
||||
raLivenessSubrange_t* subrangeBranchNotTaken;
|
||||
// processing
|
||||
uint32 lastIterationIndex;
|
||||
// instruction locations
|
||||
std::vector<raLivenessLocation_t> list_locations;
|
||||
// linked list (subranges with same GPR virtual register)
|
||||
raLivenessSubrangeLink_t link_sameVirtualRegisterGPR;
|
||||
// linked list (all subranges for this segment)
|
||||
raLivenessSubrangeLink_t link_segmentSubrangesGPR;
|
||||
};
|
||||
|
||||
struct raLivenessRange_t
|
||||
{
|
||||
sint32 virtualRegister;
|
||||
sint32 physicalRegister;
|
||||
sint32 name;
|
||||
std::vector<raLivenessSubrange_t*> list_subranges;
|
||||
};
|
||||
|
||||
struct PPCSegmentRegisterAllocatorInfo_t
|
||||
{
|
||||
// analyzer stage
|
||||
bool isPartOfProcessedLoop{}; // used during loop detection
|
||||
sint32 lastIterationIndex{};
|
||||
// linked lists
|
||||
raLivenessSubrange_t* linkedList_allSubranges{};
|
||||
raLivenessSubrange_t* linkedList_perVirtualGPR[PPC_REC_MAX_VIRTUAL_GPR]{};
|
||||
};
|
||||
|
||||
struct PPCRecVGPRDistances_t
|
||||
{
|
||||
struct _RegArrayEntry
|
||||
{
|
||||
sint32 usageStart{};
|
||||
sint32 usageEnd{};
|
||||
}reg[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{};
|
||||
};
|
||||
|
||||
typedef struct _PPCRecImlSegment_t
|
||||
{
|
||||
sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection)
|
||||
sint32 startOffset{}; // offset to first instruction in iml instruction list
|
||||
sint32 count{}; // number of instructions in segment
|
||||
uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address)
|
||||
uint32 x64Offset{}; // x64 code offset of segment start
|
||||
uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly)
|
||||
// list of intermediate instructions in this segment
|
||||
PPCRecImlInstruction_t* imlList{};
|
||||
sint32 imlListSize{};
|
||||
sint32 imlListCount{};
|
||||
// segment link
|
||||
_PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch
|
||||
_PPCRecImlSegment_t* nextSegmentBranchTaken{};
|
||||
bool nextSegmentIsUncertain{};
|
||||
sint32 loopDepth{};
|
||||
//sList_t* list_prevSegments;
|
||||
std::vector<_PPCRecImlSegment_t*> list_prevSegments{};
|
||||
// PPC range of segment
|
||||
uint32 ppcAddrMin{};
|
||||
uint32 ppcAddrMax{};
|
||||
// enterable segments
|
||||
bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
|
||||
uint32 enterPPCAddress{}; // used if isEnterable is true
|
||||
// jump destination segments
|
||||
bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps
|
||||
uint32 jumpDestinationPPCAddress{};
|
||||
// PPC FPR use mask
|
||||
bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR
|
||||
// CR use mask
|
||||
uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten)
|
||||
uint32 crBitsRead{}; // all bits that are read in this segment
|
||||
uint32 crBitsWritten{}; // bits that are written in this segment
|
||||
// register allocator info
|
||||
PPCSegmentRegisterAllocatorInfo_t raInfo{};
|
||||
PPCRecVGPRDistances_t raDistances{};
|
||||
bool raRangeExtendProcessed{};
|
||||
// segment points
|
||||
ppcRecompilerSegmentPoint_t* segmentPointList{};
|
||||
}PPCRecImlSegment_t;
|
||||
|
||||
struct ppcImlGenContext_t
|
||||
{
|
||||
PPCRecFunction_t* functionRef;
|
||||
uint32* currentInstruction;
|
||||
uint32 ppcAddressOfCurrentInstruction;
|
||||
// fpr mode
|
||||
bool LSQE{ true };
|
||||
bool PSE{ true };
|
||||
// cycle counter
|
||||
uint32 cyclesSinceLastBranch; // used to track ppc cycles
|
||||
// temporary general purpose registers
|
||||
uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
// temporary floating point registers (single and double precision)
|
||||
uint32 mappedFPRRegister[256];
|
||||
// list of intermediate instructions
|
||||
PPCRecImlInstruction_t* imlList;
|
||||
sint32 imlListSize;
|
||||
sint32 imlListCount;
|
||||
// list of segments
|
||||
PPCRecImlSegment_t** segmentList;
|
||||
sint32 segmentListSize;
|
||||
sint32 segmentListCount;
|
||||
// code generation control
|
||||
bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode
|
||||
// register allocator info
|
||||
struct
|
||||
{
|
||||
std::vector<raLivenessRange_t*> list_ranges;
|
||||
}raInfo;
|
||||
// analysis info
|
||||
struct
|
||||
{
|
||||
bool modifiesGQR[8];
|
||||
}tracking;
|
||||
};
|
||||
|
||||
typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)();
|
||||
|
||||
typedef struct
|
||||
{
|
||||
PPCRecFunction_t* ppcRecompilerFuncTable[PPC_REC_ALIGN_TO_4MB(PPC_REC_CODE_AREA_SIZE/4)]; // one virtual-function pointer for each potential ppc instruction
|
||||
PPCREC_JUMP_ENTRY ppcRecompilerDirectJumpTable[PPC_REC_ALIGN_TO_4MB(PPC_REC_CODE_AREA_SIZE/4)]; // lookup table for ppc offset to native code function
|
||||
// x64 data
|
||||
uint64 __declspec(align(16)) _x64XMM_xorNegateMaskBottom[2];
|
||||
uint64 __declspec(align(16)) _x64XMM_xorNegateMaskPair[2];
|
||||
uint64 __declspec(align(16)) _x64XMM_xorNOTMask[2];
|
||||
uint64 __declspec(align(16)) _x64XMM_andAbsMaskBottom[2];
|
||||
uint64 __declspec(align(16)) _x64XMM_andAbsMaskPair[2];
|
||||
uint32 __declspec(align(16)) _x64XMM_andFloatAbsMaskBottom[4];
|
||||
uint64 __declspec(align(16)) _x64XMM_singleWordMask[2];
|
||||
double __declspec(align(16)) _x64XMM_constDouble1_1[2];
|
||||
double __declspec(align(16)) _x64XMM_constDouble0_0[2];
|
||||
float __declspec(align(16)) _x64XMM_constFloat0_0[2];
|
||||
float __declspec(align(16)) _x64XMM_constFloat1_1[2];
|
||||
float __declspec(align(16)) _x64XMM_constFloatMin[2];
|
||||
uint32 __declspec(align(16)) _x64XMM_flushDenormalMask1[4];
|
||||
uint32 __declspec(align(16)) _x64XMM_flushDenormalMaskResetSignBits[4];
|
||||
// PSQ load/store scale tables
|
||||
double _psq_ld_scale_ps0_ps1[64 * 2];
|
||||
double _psq_ld_scale_ps0_1[64 * 2];
|
||||
double _psq_st_scale_ps0_ps1[64 * 2];
|
||||
double _psq_st_scale_ps0_1[64 * 2];
|
||||
// MXCSR
|
||||
uint32 _x64XMM_mxCsr_ftzOn;
|
||||
uint32 _x64XMM_mxCsr_ftzOff;
|
||||
}PPCRecompilerInstanceData_t;
|
||||
|
||||
extern __declspec(dllexport) PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
|
||||
extern bool ppcRecompilerEnabled;
|
||||
|
||||
__declspec(dllexport) void PPCRecompiler_init();
|
||||
|
||||
void PPCRecompiler_allocateRange(uint32 startAddress, uint32 size);
|
||||
|
||||
DLLEXPORT void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr);
|
||||
|
||||
extern void ATTR_MS_ABI (*PPCRecompiler_enterRecompilerCode)(uint64 codeMem, uint64 ppcInterpreterInstance);
|
||||
extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_visited)();
|
||||
extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
|
||||
|
||||
#define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1)
|
||||
|
||||
// CPUID
|
||||
extern __declspec(dllexport) bool hasLZCNTSupport;
|
||||
extern __declspec(dllexport) bool hasMOVBESupport;
|
||||
extern __declspec(dllexport) bool hasBMI2Support;
|
||||
extern __declspec(dllexport) bool hasAVXSupport;
|
||||
|
||||
// todo - move some of the stuff above into PPCRecompilerInternal.h
|
||||
|
||||
// recompiler interface
|
||||
|
||||
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress);
|
||||
void PPCRecompiler_attemptEnter(struct PPCInterpreter_t* hCPU, uint32 enterAddress);
|
||||
void PPCRecompiler_attemptEnterWithoutRecompile(struct PPCInterpreter_t* hCPU, uint32 enterAddress);
|
422
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h
Normal file
422
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h
Normal file
|
@ -0,0 +1,422 @@
|
|||
|
||||
#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example)
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_OP_ASSIGN, // '=' operator
|
||||
PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
|
||||
PPCREC_IML_OP_ADD, // '+' operator
|
||||
PPCREC_IML_OP_SUB, // '-' operator
|
||||
PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit
|
||||
PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr)
|
||||
PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr)
|
||||
PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
|
||||
PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
|
||||
PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
|
||||
PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
|
||||
PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
|
||||
PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit
|
||||
PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit
|
||||
PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag
|
||||
PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag
|
||||
// assign operators with cast
|
||||
PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend
|
||||
PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend
|
||||
// binary operation
|
||||
PPCREC_IML_OP_OR, // '|' operator
|
||||
PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first
|
||||
PPCREC_IML_OP_AND, // '&' operator
|
||||
PPCREC_IML_OP_XOR, // '^' operator
|
||||
PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
|
||||
PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
|
||||
PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned)
|
||||
PPCREC_IML_OP_NOT, // complement each bit
|
||||
PPCREC_IML_OP_NEG, // negate
|
||||
// ppc
|
||||
PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask)
|
||||
PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag)
|
||||
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_CNTLZW,
|
||||
PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry)
|
||||
PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
|
||||
PPCREC_IML_OP_MFCR, // copy cr to gpr
|
||||
PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask)
|
||||
// condition register
|
||||
PPCREC_IML_OP_CR_CLEAR, // clear cr bit
|
||||
PPCREC_IML_OP_CR_SET, // set cr bit
|
||||
PPCREC_IML_OP_CR_OR, // OR cr bits
|
||||
PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first
|
||||
PPCREC_IML_OP_CR_AND, // AND cr bits
|
||||
PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first
|
||||
// FPU
|
||||
PPCREC_IML_OP_FPR_ADD_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_ADD_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_PAIR,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_PAIR,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
|
||||
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half
|
||||
PPCREC_IML_OP_FPR_FCMPO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_FCMPU_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_FCMPU_TOP,
|
||||
PPCREC_IML_OP_FPR_NEGATE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_NEGATE_PAIR,
|
||||
PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ABS_PAIR,
|
||||
PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision
|
||||
PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT,
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
|
||||
PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
|
||||
PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
|
||||
// PS
|
||||
PPCREC_IML_OP_FPR_SUM0,
|
||||
PPCREC_IML_OP_FPR_SUM1,
|
||||
};
|
||||
|
||||
#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_MACRO_BLR, // macro for BLR instruction code
|
||||
PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code
|
||||
PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code
|
||||
PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code
|
||||
PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
|
||||
PPCREC_IML_MACRO_B_FAR, // branch to different function
|
||||
PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
|
||||
PPCREC_IML_MACRO_HLE, // HLE function call
|
||||
PPCREC_IML_MACRO_MFTB, // get TB register value (low or high)
|
||||
PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
|
||||
// debugging
|
||||
PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_JUMP_CONDITION_NONE,
|
||||
PPCREC_JUMP_CONDITION_E, // equal / zero
|
||||
PPCREC_JUMP_CONDITION_NE, // not equal / not zero
|
||||
PPCREC_JUMP_CONDITION_LE, // less or equal
|
||||
PPCREC_JUMP_CONDITION_L, // less
|
||||
PPCREC_JUMP_CONDITION_GE, // greater or equal
|
||||
PPCREC_JUMP_CONDITION_G, // greater
|
||||
// special case:
|
||||
PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling
|
||||
PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow
|
||||
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_CR_MODE_COMPARE_SIGNED,
|
||||
PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare
|
||||
// others: PPCREC_CR_MODE_ARITHMETIC,
|
||||
PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code)
|
||||
PPCREC_CR_MODE_LOGICAL,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_TYPE_NONE,
|
||||
PPCREC_IML_TYPE_NO_OP, // no-op instruction
|
||||
PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction)
|
||||
PPCREC_IML_TYPE_R_R, // r* (op) *r
|
||||
PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
|
||||
PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
|
||||
PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
|
||||
PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
|
||||
PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
|
||||
PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
|
||||
PPCREC_IML_TYPE_R_NAME, // r* = name
|
||||
PPCREC_IML_TYPE_NAME_R, // name* = r*
|
||||
PPCREC_IML_TYPE_R_S32, // r* (op) imm
|
||||
PPCREC_IML_TYPE_MACRO,
|
||||
PPCREC_IML_TYPE_CJUMP, // conditional jump
|
||||
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0
|
||||
PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable
|
||||
PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands)
|
||||
// conditional
|
||||
PPCREC_IML_TYPE_CONDITIONAL_R_S32,
|
||||
// FPR
|
||||
PPCREC_IML_TYPE_FPR_R_NAME, // name = f*
|
||||
PPCREC_IML_TYPE_FPR_NAME_R, // f* = name
|
||||
PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R_R_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R,
|
||||
// special
|
||||
PPCREC_IML_TYPE_MEM2MEM, // memory to memory copy (deprecated)
|
||||
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_NAME_NONE,
|
||||
PPCREC_NAME_TEMPORARY,
|
||||
PPCREC_NAME_R0 = 1000,
|
||||
PPCREC_NAME_SPR0 = 2000,
|
||||
PPCREC_NAME_FPR0 = 3000,
|
||||
PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7
|
||||
//PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away)
|
||||
};
|
||||
|
||||
// special cases for LOAD/STORE
|
||||
#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value)
|
||||
#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid)
|
||||
#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1
|
||||
#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2
|
||||
#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3
|
||||
#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1
|
||||
#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2
|
||||
#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3
|
||||
|
||||
#define PPC_REC_INVALID_REGISTER 0xFF
|
||||
|
||||
#define PPCREC_CR_BIT_LT 0
|
||||
#define PPCREC_CR_BIT_GT 1
|
||||
#define PPCREC_CR_BIT_EQ 2
|
||||
#define PPCREC_CR_BIT_SO 3
|
||||
|
||||
enum
|
||||
{
|
||||
// fpr load
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1,
|
||||
// fpr store
|
||||
PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0
|
||||
PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0
|
||||
|
||||
PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
|
||||
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1,
|
||||
};
|
||||
|
||||
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set<uint32>& entryAddresses);
|
||||
void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount);
|
||||
PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index);
|
||||
|
||||
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count);
|
||||
|
||||
void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index);
|
||||
void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint);
|
||||
|
||||
// GPR register management
|
||||
uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
|
||||
uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
|
||||
// FPR register management
|
||||
uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
|
||||
uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
|
||||
// IML instruction generation
|
||||
void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0);
|
||||
|
||||
|
||||
|
||||
// IML instruction generation (new style, can generate new instructions but also overwrite existing ones)
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth);
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER);
|
||||
|
||||
// IML generation - FPU
|
||||
bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FNABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_ABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MERGE00(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MERGE01(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
|
||||
// IML general
|
||||
|
||||
bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml);
|
||||
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew);
|
||||
void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment);
|
||||
|
||||
// IML analyzer
|
||||
typedef struct
|
||||
{
|
||||
uint32 readCRBits;
|
||||
uint32 writtenCRBits;
|
||||
}PPCRecCRTracking_t;
|
||||
|
||||
bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment);
|
||||
bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking);
|
||||
|
||||
// IML optimizer
|
||||
bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// IML register allocator
|
||||
void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// late optimizations
|
||||
void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// debug
|
||||
|
||||
void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
sint16 readNamedReg1;
|
||||
sint16 readNamedReg2;
|
||||
sint16 readNamedReg3;
|
||||
sint16 writtenNamedReg1;
|
||||
};
|
||||
sint16 gpr[4]; // 3 read + 1 write
|
||||
};
|
||||
// FPR
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
// note: If destination operand is not fully written, it will be added as a read FPR as well
|
||||
sint16 readFPR1;
|
||||
sint16 readFPR2;
|
||||
sint16 readFPR3;
|
||||
sint16 readFPR4; // usually this is set to the result FPR if only partially overwritten
|
||||
sint16 writtenFPR1;
|
||||
};
|
||||
sint16 fpr[4];
|
||||
};
|
||||
}PPCImlOptimizerUsedRegisters_t;
|
||||
|
||||
void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed);
|
137
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
Normal file
137
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
Normal file
|
@ -0,0 +1,137 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "util/helpers/fixedSizeList.h"
|
||||
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
|
||||
|
||||
/*
|
||||
* Initializes a single segment and returns true if it is a finite loop
|
||||
*/
|
||||
bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
bool isTightFiniteLoop = false;
|
||||
// base criteria, must jump to beginning of same segment
|
||||
if (imlSegment->nextSegmentBranchTaken != imlSegment)
|
||||
return false;
|
||||
// loops using BDNZ are assumed to always be finite
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB && imlSegment->imlList[t].crRegister == 8)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// for non-BDNZ loops, check for common patterns
|
||||
// risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB)
|
||||
// this catches most loops with load-update and store-update instructions, but also those with decrementing counters
|
||||
FixedSizeList<sint32, 64, true> list_modifiedRegisters;
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB) )
|
||||
{
|
||||
list_modifiedRegisters.addUnique(imlSegment->imlList[t].op_r_immS32.registerIndex);
|
||||
}
|
||||
}
|
||||
if (list_modifiedRegisters.count > 0)
|
||||
{
|
||||
// remove all registers from the list that are modified by non-ADD/SUB instructions
|
||||
// todo: We should also cover the case where ADD+SUB on the same register cancel the effect out
|
||||
PPCImlOptimizerUsedRegisters_t registersUsed;
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB))
|
||||
continue;
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + t, ®istersUsed);
|
||||
if(registersUsed.writtenNamedReg1 < 0)
|
||||
continue;
|
||||
list_modifiedRegisters.remove(registersUsed.writtenNamedReg1);
|
||||
}
|
||||
if (list_modifiedRegisters.count > 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister)
|
||||
*/
|
||||
bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction)
|
||||
{
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking)
|
||||
{
|
||||
crTracking->readCRBits = 0;
|
||||
crTracking->writtenCRBits = 0;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP)
|
||||
{
|
||||
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex);
|
||||
crTracking->readCRBits = (crBitFlag);
|
||||
}
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex);
|
||||
crTracking->readCRBits = crBitFlag;
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR)
|
||||
{
|
||||
crTracking->readCRBits = 0xFFFFFFFF;
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF)
|
||||
{
|
||||
crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_CR)
|
||||
{
|
||||
if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_SET)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
|
||||
crTracking->writtenCRBits = crBitFlag;
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_AND ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
|
||||
crTracking->writtenCRBits = crBitFlag;
|
||||
crBitFlag = 1 << (imlInstruction->op_cr.crA);
|
||||
crTracking->readCRBits = crBitFlag;
|
||||
crBitFlag = 1 << (imlInstruction->op_cr.crB);
|
||||
crTracking->readCRBits |= crBitFlag;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
else if (PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7)
|
||||
{
|
||||
crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4));
|
||||
}
|
||||
else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
|
||||
{
|
||||
// overwrites CR0
|
||||
crTracking->writtenCRBits |= (0xF << 0);
|
||||
}
|
||||
}
|
5026
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
Normal file
5026
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
Normal file
File diff suppressed because it is too large
Load diff
1926
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp
Normal file
1926
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp
Normal file
File diff suppressed because it is too large
Load diff
2175
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
Normal file
2175
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
Normal file
File diff suppressed because it is too large
Load diff
399
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
Normal file
399
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
Normal file
|
@ -0,0 +1,399 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "PPCRecompilerImlRanges.h"
|
||||
#include "util/helpers/MemoryPool.h"
|
||||
|
||||
void PPCRecRARange_addLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if ((*root) && (*root)->range->virtualRegister != subrange->range->virtualRegister)
|
||||
assert_dbg();
|
||||
#endif
|
||||
subrange->link_sameVirtualRegisterGPR.next = *root;
|
||||
if (*root)
|
||||
(*root)->link_sameVirtualRegisterGPR.prev = subrange;
|
||||
subrange->link_sameVirtualRegisterGPR.prev = nullptr;
|
||||
*root = subrange;
|
||||
}
|
||||
|
||||
void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
subrange->link_segmentSubrangesGPR.next = *root;
|
||||
if (*root)
|
||||
(*root)->link_segmentSubrangesGPR.prev = subrange;
|
||||
subrange->link_segmentSubrangesGPR.prev = nullptr;
|
||||
*root = subrange;
|
||||
}
|
||||
|
||||
void PPCRecRARange_removeLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
raLivenessSubrange_t* tempPrev = subrange->link_sameVirtualRegisterGPR.prev;
|
||||
if (subrange->link_sameVirtualRegisterGPR.prev)
|
||||
subrange->link_sameVirtualRegisterGPR.prev->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next;
|
||||
else
|
||||
(*root) = subrange->link_sameVirtualRegisterGPR.next;
|
||||
if (subrange->link_sameVirtualRegisterGPR.next)
|
||||
subrange->link_sameVirtualRegisterGPR.next->link_sameVirtualRegisterGPR.prev = tempPrev;
|
||||
#ifndef PUBLIC_RELEASE
|
||||
subrange->link_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1;
|
||||
subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1;
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRARange_removeLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
raLivenessSubrange_t* tempPrev = subrange->link_segmentSubrangesGPR.prev;
|
||||
if (subrange->link_segmentSubrangesGPR.prev)
|
||||
subrange->link_segmentSubrangesGPR.prev->link_segmentSubrangesGPR.next = subrange->link_segmentSubrangesGPR.next;
|
||||
else
|
||||
(*root) = subrange->link_segmentSubrangesGPR.next;
|
||||
if (subrange->link_segmentSubrangesGPR.next)
|
||||
subrange->link_segmentSubrangesGPR.next->link_segmentSubrangesGPR.prev = tempPrev;
|
||||
#ifndef PUBLIC_RELEASE
|
||||
subrange->link_segmentSubrangesGPR.prev = (raLivenessSubrange_t*)1;
|
||||
subrange->link_segmentSubrangesGPR.next = (raLivenessSubrange_t*)1;
|
||||
#endif
|
||||
}
|
||||
|
||||
MemoryPoolPermanentObjects<raLivenessRange_t> memPool_livenessRange(4096);
|
||||
MemoryPoolPermanentObjects<raLivenessSubrange_t> memPool_livenessSubrange(4096);
|
||||
|
||||
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name)
|
||||
{
|
||||
raLivenessRange_t* livenessRange = memPool_livenessRange.acquireObj();
|
||||
livenessRange->list_subranges.resize(0);
|
||||
livenessRange->virtualRegister = virtualRegister;
|
||||
livenessRange->name = name;
|
||||
livenessRange->physicalRegister = -1;
|
||||
ppcImlGenContext->raInfo.list_ranges.push_back(livenessRange);
|
||||
return livenessRange;
|
||||
}
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex)
|
||||
{
|
||||
raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj();
|
||||
livenessSubrange->list_locations.resize(0);
|
||||
livenessSubrange->range = range;
|
||||
livenessSubrange->imlSegment = imlSegment;
|
||||
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->start, imlSegment, startIndex);
|
||||
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->end, imlSegment, endIndex);
|
||||
// default values
|
||||
livenessSubrange->hasStore = false;
|
||||
livenessSubrange->hasStoreDelayed = false;
|
||||
livenessSubrange->lastIterationIndex = 0;
|
||||
livenessSubrange->subrangeBranchNotTaken = nullptr;
|
||||
livenessSubrange->subrangeBranchTaken = nullptr;
|
||||
livenessSubrange->_noLoad = false;
|
||||
// add to range
|
||||
range->list_subranges.push_back(livenessSubrange);
|
||||
// add to segment
|
||||
PPCRecRARange_addLink_perVirtualGPR(&(imlSegment->raInfo.linkedList_perVirtualGPR[range->virtualRegister]), livenessSubrange);
|
||||
PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange);
|
||||
return livenessSubrange;
|
||||
}
|
||||
|
||||
void _unlinkSubrange(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = subrange->imlSegment;
|
||||
PPCRecRARange_removeLink_perVirtualGPR(&imlSegment->raInfo.linkedList_perVirtualGPR[subrange->range->virtualRegister], subrange);
|
||||
PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
_unlinkSubrange(subrange);
|
||||
subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange));
|
||||
subrange->list_locations.clear();
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
|
||||
memPool_livenessSubrange.releaseObj(subrange);
|
||||
}
|
||||
|
||||
void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
_unlinkSubrange(subrange);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
|
||||
memPool_livenessSubrange.releaseObj(subrange);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
|
||||
}
|
||||
ppcImlGenContext->raInfo.list_ranges.erase(std::find(ppcImlGenContext->raInfo.list_ranges.begin(), ppcImlGenContext->raInfo.list_ranges.end(), range));
|
||||
memPool_livenessRange.releaseObj(range);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
|
||||
}
|
||||
memPool_livenessRange.releaseObj(range);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext, range);
|
||||
}
|
||||
ppcImlGenContext->raInfo.list_ranges.clear();
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange)
|
||||
{
|
||||
cemu_assert_debug(range != absorbedRange);
|
||||
cemu_assert_debug(range->virtualRegister == absorbedRange->virtualRegister);
|
||||
// move all subranges from absorbedRange to range
|
||||
for (auto& subrange : absorbedRange->list_subranges)
|
||||
{
|
||||
range->list_subranges.push_back(subrange);
|
||||
subrange->range = range;
|
||||
}
|
||||
absorbedRange->list_subranges.clear();
|
||||
PPCRecRA_deleteRange(ppcImlGenContext, absorbedRange);
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange)
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
PPCRecRA_debugValidateSubrange(absorbedSubrange);
|
||||
if (subrange->imlSegment != absorbedSubrange->imlSegment)
|
||||
assert_dbg();
|
||||
if (subrange->end.index > absorbedSubrange->start.index)
|
||||
assert_dbg();
|
||||
if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
|
||||
assert_dbg();
|
||||
if (subrange == absorbedSubrange)
|
||||
assert_dbg();
|
||||
#endif
|
||||
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
|
||||
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
|
||||
|
||||
// merge usage locations
|
||||
for (auto& location : absorbedSubrange->list_locations)
|
||||
{
|
||||
subrange->list_locations.push_back(location);
|
||||
}
|
||||
absorbedSubrange->list_locations.clear();
|
||||
|
||||
subrange->end.index = absorbedSubrange->end.index;
|
||||
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
|
||||
PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange);
|
||||
}
|
||||
|
||||
// remove all inter-segment connections from the range and split it into local ranges (also removes empty ranges)
|
||||
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
if (range->list_subranges.size() == 1)
|
||||
assert_dbg();
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
continue;
|
||||
raLivenessRange_t* newRange = PPCRecRA_createRangeBase(ppcImlGenContext, range->virtualRegister, range->name);
|
||||
raLivenessSubrange_t* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, newRange, subrange->imlSegment, subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1);
|
||||
// copy locations
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
newSubrange->list_locations.push_back(location);
|
||||
}
|
||||
}
|
||||
// remove original range
|
||||
PPCRecRA_deleteRange(ppcImlGenContext, range);
|
||||
}
|
||||
|
||||
#ifndef PUBLIC_RELEASE
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
// validate subrange
|
||||
if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken)
|
||||
assert_dbg();
|
||||
if (subrange->subrangeBranchNotTaken && subrange->subrangeBranchNotTaken->imlSegment != subrange->imlSegment->nextSegmentBranchNotTaken)
|
||||
assert_dbg();
|
||||
}
|
||||
#else
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {}
|
||||
#endif
|
||||
|
||||
// split subrange at the given index
|
||||
// After the split there will be two ranges/subranges:
|
||||
// head -> subrange is shortned to end at splitIndex
|
||||
// tail -> a new subrange that reaches from splitIndex to the end of the original subrange
|
||||
// if head has a physical register assigned it will not carry over to tail
|
||||
// The return value is the tail subrange
|
||||
// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations
|
||||
// Ranges that begin at RA_INTER_RANGE_START are allowed and can be split
|
||||
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole)
|
||||
{
|
||||
// validation
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START)
|
||||
assert_dbg();
|
||||
if (subrange->start.index >= splitIndex)
|
||||
assert_dbg();
|
||||
if (subrange->end.index <= splitIndex)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// create tail
|
||||
raLivenessRange_t* tailRange = PPCRecRA_createRangeBase(ppcImlGenContext, subrange->range->virtualRegister, subrange->range->name);
|
||||
raLivenessSubrange_t* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, tailRange, subrange->imlSegment, splitIndex, subrange->end.index);
|
||||
// copy locations
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if (location.index >= splitIndex)
|
||||
tailSubrange->list_locations.push_back(location);
|
||||
}
|
||||
// remove tail locations from head
|
||||
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
|
||||
{
|
||||
raLivenessLocation_t* location = subrange->list_locations.data() + i;
|
||||
if (location->index >= splitIndex)
|
||||
{
|
||||
subrange->list_locations.resize(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// adjust start/end
|
||||
if (trimToHole)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
subrange->end.index = subrange->start.index+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
subrange->end.index = subrange->list_locations.back().index + 1;
|
||||
}
|
||||
if (tailSubrange->list_locations.empty())
|
||||
{
|
||||
assert_dbg(); // should not happen? (In this case we can just avoid generating a tail at all)
|
||||
}
|
||||
else
|
||||
{
|
||||
tailSubrange->start.index = tailSubrange->list_locations.front().index;
|
||||
}
|
||||
}
|
||||
return tailSubrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
return;
|
||||
}
|
||||
raLivenessLocation_t* lastLocation = subrange->list_locations.data() + (subrange->list_locations.size() - 1);
|
||||
cemu_assert_debug(lastLocation->index <= index);
|
||||
if (lastLocation->index == index)
|
||||
{
|
||||
// update
|
||||
lastLocation->isRead = lastLocation->isRead || isRead;
|
||||
lastLocation->isWrite = lastLocation->isWrite || isWrite;
|
||||
return;
|
||||
}
|
||||
// add new
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
}
|
||||
|
||||
sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
sint32 v = imlSegment->loopDepth + 1;
|
||||
v *= 5;
|
||||
return v*v; // 25, 100, 225, 400
|
||||
}
|
||||
|
||||
// calculate cost of entire range
|
||||
// ignores data flow and does not detect avoidable reads/stores
|
||||
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range)
|
||||
{
|
||||
sint32 cost = 0;
|
||||
|
||||
// todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code).
|
||||
|
||||
// currently we calculate the cost based on the most expensive entry/exit point
|
||||
|
||||
sint32 mostExpensiveRead = 0;
|
||||
sint32 mostExpensiveWrite = 0;
|
||||
sint32 readCount = 0;
|
||||
sint32 writeCount = 0;
|
||||
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->start.index != RA_INTER_RANGE_START)
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
|
||||
readCount++;
|
||||
}
|
||||
if (subrange->end.index != RA_INTER_RANGE_END)
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
|
||||
writeCount++;
|
||||
}
|
||||
}
|
||||
cost = mostExpensiveRead + mostExpensiveWrite;
|
||||
cost = cost + (readCount + writeCount) / 10;
|
||||
return cost;
|
||||
}
|
||||
|
||||
// calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range)
|
||||
{
|
||||
sint32 cost = -PPCRecRARange_estimateCost(range);
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
continue;
|
||||
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // we assume a read and a store
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex)
|
||||
{
|
||||
// validation
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (subrange->end.index == RA_INTER_RANGE_END)
|
||||
assert_dbg();
|
||||
#endif
|
||||
|
||||
sint32 cost = 0;
|
||||
// find split position in location list
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
assert_dbg(); // should not happen?
|
||||
return 0;
|
||||
}
|
||||
if (splitIndex <= subrange->list_locations.front().index)
|
||||
return 0;
|
||||
if (splitIndex > subrange->list_locations.back().index)
|
||||
return 0;
|
||||
|
||||
// todo - determine exact cost of split subranges
|
||||
|
||||
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store
|
||||
|
||||
//for (sint32 f = 0; f < subrange->list_locations.size(); f++)
|
||||
//{
|
||||
// raLivenessLocation_t* location = subrange->list_locations.data() + f;
|
||||
// if (location->index >= splitIndex)
|
||||
// {
|
||||
// ...
|
||||
// return cost;
|
||||
// }
|
||||
//}
|
||||
|
||||
return cost;
|
||||
}
|
27
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
Normal file
27
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name);
|
||||
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex);
|
||||
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange);
|
||||
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
|
||||
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange);
|
||||
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
|
||||
|
||||
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange);
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole = false);
|
||||
|
||||
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite);
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange);
|
||||
|
||||
// cost estimation
|
||||
sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment);
|
||||
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range);
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range);
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex);
|
||||
|
||||
// special values to mark the index of ranges that reach across the segment border
|
||||
#define RA_INTER_RANGE_START (-1)
|
||||
#define RA_INTER_RANGE_END (0x70000000)
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,414 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "PPCRecompilerImlRanges.h"
|
||||
#include <queue>
|
||||
|
||||
bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX);
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
imlSegment->raDistances.reg[i].usageStart = INT_MAX;
|
||||
imlSegment->raDistances.reg[i].usageEnd = INT_MIN;
|
||||
}
|
||||
// scan instructions for usage range
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR);
|
||||
imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction
|
||||
imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index+1); // index after instruction
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// for each register calculate min/max index of usage range within each segment
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, ppcImlGenContext->segmentList[s]);
|
||||
}
|
||||
}
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range)
|
||||
{
|
||||
if (imlSegment->raDistances.isProcessed[vGPR])
|
||||
{
|
||||
// return already existing segment
|
||||
return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR];
|
||||
}
|
||||
imlSegment->raDistances.isProcessed[vGPR] = true;
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
return nullptr;
|
||||
// create subrange
|
||||
cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr);
|
||||
raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd);
|
||||
// traverse forward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
}
|
||||
// traverse backward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range);
|
||||
}
|
||||
}
|
||||
// return subrange
|
||||
return subrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
if( _isRangeDefined(imlSegment, i) == false )
|
||||
continue;
|
||||
if( imlSegment->raDistances.isProcessed[i])
|
||||
continue;
|
||||
raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]);
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range);
|
||||
}
|
||||
// create lookup table of ranges
|
||||
raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i];
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// parse instructions and convert to locations
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
// handle accessed GPR
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
bool isWrite = (t == 3);
|
||||
// add location
|
||||
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite);
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (index < vGPR2Subrange[virtualRegister]->start.index)
|
||||
assert_dbg();
|
||||
if (index+1 > vGPR2Subrange[virtualRegister]->end.index)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
return;
|
||||
}
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START;
|
||||
}
|
||||
else
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
}
|
||||
// propagate backwards
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR);
|
||||
}
|
||||
}
|
||||
|
||||
void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (routeDepth < 2)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// extend starting range to end of segment
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR);
|
||||
// extend all the connecting segments in both directions
|
||||
for (sint32 i = 1; i < (routeDepth - 1); i++)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
}
|
||||
// extend the final segment towards the beginning
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth-1], vGPR);
|
||||
}
|
||||
|
||||
void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
if (routeDepth >= 64)
|
||||
{
|
||||
forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress);
|
||||
return;
|
||||
}
|
||||
route[routeDepth] = currentSegment;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX)
|
||||
{
|
||||
// measure distance to end of segment
|
||||
distanceLeft -= currentSegment->imlListCount;
|
||||
if (distanceLeft > 0)
|
||||
{
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// measure distance to range
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (distanceLeft < currentSegment->imlListCount)
|
||||
return; // range too far away
|
||||
}
|
||||
else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft)
|
||||
return; // out of range
|
||||
// found close range -> connect ranges
|
||||
_PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR)
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// count instructions to end of initial segment
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START)
|
||||
assert_dbg();
|
||||
sint32 instructionsUntilEndOfSeg;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
instructionsUntilEndOfSeg = 0;
|
||||
else
|
||||
instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd;
|
||||
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (instructionsUntilEndOfSeg < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
|
||||
if (remainingScanDist <= 0)
|
||||
return; // can't reach end
|
||||
|
||||
// also dont forget: Extending is easier if we allow 'non symetric' branches. E.g. register range one enters one branch
|
||||
PPCRecImlSegment_t* route[64];
|
||||
route[0] = currentSegment;
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if(imlSegment->raDistances.reg[i].usageStart == INT_MAX)
|
||||
continue; // not used
|
||||
// check and extend if possible
|
||||
PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i);
|
||||
}
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
assert_dbg();
|
||||
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
std::vector<PPCRecImlSegment_t*> list_segments;
|
||||
list_segments.reserve(1000);
|
||||
sint32 index = 0;
|
||||
imlSegment->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(imlSegment);
|
||||
while (index < list_segments.size())
|
||||
{
|
||||
PPCRecImlSegment_t* currentSegment = list_segments[index];
|
||||
PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment);
|
||||
// follow flow
|
||||
if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
if (imlSegment->list_prevSegments.empty())
|
||||
{
|
||||
if (imlSegment->raRangeExtendProcessed)
|
||||
assert_dbg(); // should not happen
|
||||
PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
auto localLoopDepth = imlSegment->loopDepth;
|
||||
if( localLoopDepth <= 0 )
|
||||
continue; // not inside a loop
|
||||
// look for loop exit
|
||||
bool hasLoopExit = false;
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if(hasLoopExit == false)
|
||||
continue;
|
||||
|
||||
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END)
|
||||
continue; // range not set or does not reach end of segment
|
||||
if(imlSegment->nextSegmentBranchTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i);
|
||||
if(imlSegment->nextSegmentBranchNotTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// merge close ranges
|
||||
PPCRecRA_mergeCloseRangesV2(ppcImlGenContext);
|
||||
// extra pass to move register stores out of loops
|
||||
PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext);
|
||||
// calculate liveness ranges
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
bool isRead = false;
|
||||
bool isWritten = false;
|
||||
bool isOverwritten = false;
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if (location.isRead)
|
||||
{
|
||||
isRead = true;
|
||||
}
|
||||
if (location.isWrite)
|
||||
{
|
||||
if (isRead == false)
|
||||
isOverwritten = true;
|
||||
isWritten = true;
|
||||
}
|
||||
}
|
||||
subrange->_noLoad = isOverwritten;
|
||||
subrange->hasStore = isWritten;
|
||||
|
||||
if (subrange->start.index == RA_INTER_RANGE_START)
|
||||
subrange->_noLoad = true;
|
||||
}
|
||||
|
||||
void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange);
|
||||
|
||||
void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore
|
||||
// first do a per-subrange pass
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
PPCRecRA_analyzeSubrangeDataDependencyV2(subrange);
|
||||
}
|
||||
}
|
||||
// then do a second pass where we scan along subrange flow
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm
|
||||
{
|
||||
_analyzeRangeDataFlow(subrange);
|
||||
}
|
||||
}
|
||||
}
|
173
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
Normal file
173
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
Normal file
|
@ -0,0 +1,173 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
|
||||
PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
if( ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset )
|
||||
{
|
||||
return ppcImlGenContext->segmentList[s];
|
||||
}
|
||||
}
|
||||
debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
// make sure segments aren't already linked
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
|
||||
return;
|
||||
// add as next segment for source
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken != NULL)
|
||||
assert_dbg();
|
||||
imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst;
|
||||
// add as previous segment for destination
|
||||
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
// make sure segments aren't already linked
|
||||
if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
|
||||
return;
|
||||
// add as next segment for source
|
||||
if (imlSegmentSrc->nextSegmentBranchTaken != NULL)
|
||||
assert_dbg();
|
||||
imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst;
|
||||
// add as previous segment for destination
|
||||
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
|
||||
{
|
||||
imlSegmentSrc->nextSegmentBranchNotTaken = NULL;
|
||||
}
|
||||
else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
|
||||
{
|
||||
imlSegmentSrc->nextSegmentBranchTaken = NULL;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
|
||||
bool matchFound = false;
|
||||
for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++)
|
||||
{
|
||||
if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc)
|
||||
{
|
||||
imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin()+i);
|
||||
matchFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matchFound == false)
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
/*
|
||||
* Replaces all links to segment orig with linkts to segment new
|
||||
*/
|
||||
void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew)
|
||||
{
|
||||
while (imlSegmentOrig->list_prevSegments.size() != 0)
|
||||
{
|
||||
PPCRecImlSegment_t* prevSegment = imlSegmentOrig->list_prevSegments[0];
|
||||
if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig)
|
||||
{
|
||||
PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(prevSegment, imlSegmentNew);
|
||||
}
|
||||
else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig)
|
||||
{
|
||||
PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig);
|
||||
PPCRecompilerIml_setLinkBranchTaken(prevSegment, imlSegmentNew);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
|
||||
bool isLastSegment = (s+1)>=ppcImlGenContext->segmentListCount;
|
||||
PPCRecImlSegment_t* nextSegment = isLastSegment?NULL:ppcImlGenContext->segmentList[s+1];
|
||||
// handle empty segment
|
||||
if( imlSegment->imlListCount == 0 )
|
||||
{
|
||||
if (isLastSegment == false)
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList[s+1]); // continue execution to next segment
|
||||
else
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
continue;
|
||||
}
|
||||
// check last instruction of segment
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+(imlSegment->imlListCount-1);
|
||||
if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
|
||||
{
|
||||
// find destination segment by ppc jump address
|
||||
PPCRecImlSegment_t* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress);
|
||||
if( jumpDestSegment )
|
||||
{
|
||||
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment);
|
||||
PPCRecompilerIml_setLinkBranchTaken(imlSegment, jumpDestSegment);
|
||||
}
|
||||
else
|
||||
{
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
}
|
||||
else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
|
||||
{
|
||||
// currently we assume that the next segment is unknown for all macros
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// all other instruction types do not branch
|
||||
//imlSegment->nextSegment[0] = nextSegment;
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment);
|
||||
//imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
sint32 initialSegmentCount = ppcImlGenContext->segmentListCount;
|
||||
for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i];
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
{
|
||||
// spawn new segment at end
|
||||
PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentListCount, 1);
|
||||
PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount-1];
|
||||
entrySegment->isEnterable = true;
|
||||
entrySegment->enterPPCAddress = imlSegment->enterPPCAddress;
|
||||
// create jump instruction
|
||||
PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1);
|
||||
PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList + 0);
|
||||
PPCRecompilerIml_setLinkBranchTaken(entrySegment, imlSegment);
|
||||
// remove enterable flag from original segment
|
||||
imlSegment->isEnterable = false;
|
||||
imlSegment->enterPPCAddress = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
if (imlSegment->imlListCount == 0)
|
||||
return nullptr;
|
||||
return imlSegment->imlList + (imlSegment->imlListCount - 1);
|
||||
}
|
2682
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp
Normal file
2682
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp
Normal file
File diff suppressed because it is too large
Load diff
332
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h
Normal file
332
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h
Normal file
|
@ -0,0 +1,332 @@
|
|||
|
||||
typedef struct
|
||||
{
|
||||
uint32 offset;
|
||||
uint8 type;
|
||||
void* extraInfo;
|
||||
}x64RelocEntry_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8* codeBuffer;
|
||||
sint32 codeBufferIndex;
|
||||
sint32 codeBufferSize;
|
||||
// cr state
|
||||
sint32 activeCRRegister; // current x86 condition flags reflect this cr* register
|
||||
sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned)
|
||||
// relocate offsets
|
||||
x64RelocEntry_t* relocateOffsetTable;
|
||||
sint32 relocateOffsetTableSize;
|
||||
sint32 relocateOffsetTableCount;
|
||||
}x64GenContext_t;
|
||||
|
||||
// Some of these are defined by winnt.h and gnu headers
|
||||
#undef REG_EAX
|
||||
#undef REG_ECX
|
||||
#undef REG_EDX
|
||||
#undef REG_EBX
|
||||
#undef REG_ESP
|
||||
#undef REG_EBP
|
||||
#undef REG_ESI
|
||||
#undef REG_EDI
|
||||
#undef REG_NONE
|
||||
#undef REG_RAX
|
||||
#undef REG_RCX
|
||||
#undef REG_RDX
|
||||
#undef REG_RBX
|
||||
#undef REG_RSP
|
||||
#undef REG_RBP
|
||||
#undef REG_RSI
|
||||
#undef REG_RDI
|
||||
#undef REG_R8
|
||||
#undef REG_R9
|
||||
#undef REG_R10
|
||||
#undef REG_R11
|
||||
#undef REG_R12
|
||||
#undef REG_R13
|
||||
#undef REG_R14
|
||||
#undef REG_R15
|
||||
|
||||
#define REG_EAX 0
|
||||
#define REG_ECX 1
|
||||
#define REG_EDX 2
|
||||
#define REG_EBX 3
|
||||
#define REG_ESP 4 // reserved for low half of hCPU pointer
|
||||
#define REG_EBP 5
|
||||
#define REG_ESI 6
|
||||
#define REG_EDI 7
|
||||
#define REG_NONE -1
|
||||
|
||||
#define REG_RAX 0
|
||||
#define REG_RCX 1
|
||||
#define REG_RDX 2
|
||||
#define REG_RBX 3
|
||||
#define REG_RSP 4 // reserved for hCPU pointer
|
||||
#define REG_RBP 5
|
||||
#define REG_RSI 6
|
||||
#define REG_RDI 7
|
||||
#define REG_R8 8
|
||||
#define REG_R9 9
|
||||
#define REG_R10 10
|
||||
#define REG_R11 11
|
||||
#define REG_R12 12
|
||||
#define REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
|
||||
#define REG_R14 14 // reserved as temporary register
|
||||
#define REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
|
||||
|
||||
#define REG_AL 0
|
||||
#define REG_CL 1
|
||||
#define REG_DL 2
|
||||
#define REG_BL 3
|
||||
#define REG_AH 4
|
||||
#define REG_CH 5
|
||||
#define REG_DH 6
|
||||
#define REG_BH 7
|
||||
|
||||
// reserved registers
|
||||
#define REG_RESV_TEMP (REG_R14)
|
||||
#define REG_RESV_HCPU (REG_RSP)
|
||||
#define REG_RESV_MEMBASE (REG_R13)
|
||||
#define REG_RESV_RECDATA (REG_R15)
|
||||
|
||||
// reserved floating-point registers
|
||||
#define REG_RESV_FPR_TEMP (15)
|
||||
|
||||
|
||||
extern sint32 x64Gen_registerMap[12];
|
||||
|
||||
#define tempToRealRegister(__x) (x64Gen_registerMap[__x])
|
||||
#define tempToRealFPRRegister(__x) (__x)
|
||||
#define reg32ToReg16(__x) (__x)
|
||||
|
||||
enum
|
||||
{
|
||||
X86_CONDITION_EQUAL, // or zero
|
||||
X86_CONDITION_NOT_EQUAL, // or not zero
|
||||
X86_CONDITION_SIGNED_LESS, // or not greater/equal
|
||||
X86_CONDITION_SIGNED_GREATER, // or not less/equal
|
||||
X86_CONDITION_SIGNED_LESS_EQUAL, // or not greater
|
||||
X86_CONDITION_SIGNED_GREATER_EQUAL, // or not less
|
||||
X86_CONDITION_UNSIGNED_BELOW, // or not above/equal
|
||||
X86_CONDITION_UNSIGNED_ABOVE, // or not below/equal
|
||||
X86_CONDITION_UNSIGNED_BELOW_EQUAL, // or not above
|
||||
X86_CONDITION_UNSIGNED_ABOVE_EQUAL, // or not below
|
||||
X86_CONDITION_CARRY, // carry flag must be set
|
||||
X86_CONDITION_NOT_CARRY, // carry flag must not be set
|
||||
X86_CONDITION_SIGN, // sign flag must be set
|
||||
X86_CONDITION_NOT_SIGN, // sign flag must not be set
|
||||
X86_CONDITION_PARITY, // parity flag must be set
|
||||
X86_CONDITION_NONE, // no condition, jump always
|
||||
};
|
||||
|
||||
#define PPCREC_CR_TEMPORARY (8) // never stored
|
||||
#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI)
|
||||
#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
|
||||
#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
|
||||
|
||||
#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction
|
||||
#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset
|
||||
#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment
|
||||
|
||||
#define PPC_X64_GPR_USABLE_REGISTERS (16-4)
|
||||
#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register
|
||||
|
||||
|
||||
bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext);
|
||||
|
||||
void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset);
|
||||
|
||||
void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
|
||||
// ASM gen
|
||||
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v);
|
||||
void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v);
|
||||
void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v);
|
||||
|
||||
void x64Emit_mov_reg32_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
|
||||
void x64Emit_mov_mem32_reg32(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
|
||||
void x64Emit_mov_mem64_reg64(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
|
||||
void x64Emit_mov_reg64_mem64(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
|
||||
void x64Emit_mov_reg64_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
|
||||
void x64Emit_mov_mem32_reg64(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
|
||||
void x64Emit_mov_reg64_mem64(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
|
||||
void x64Emit_mov_reg32_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
|
||||
void x64Emit_mov_reg64b_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
|
||||
void x64Emit_movZX_reg32_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
|
||||
void x64Emit_movZX_reg64_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
|
||||
|
||||
void x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
void x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
void x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_mov_mem32Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32);
|
||||
void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32);
|
||||
void x64Gen_mov_mem8Reg64_imm8(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint8 dataImmU8);
|
||||
|
||||
void x64Gen_mov_reg64_imm64(x64GenContext_t* x64GenContext, sint32 destRegister, uint64 immU64);
|
||||
void x64Gen_mov_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 destRegister, uint64 immU32);
|
||||
void x64Gen_mov_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
|
||||
void x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64);
|
||||
|
||||
void x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, uint32 conditionType, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_mov_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_xchg_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_movZeroExtend_reg64Low32_reg64Low8(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
|
||||
void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
|
||||
void x64Gen_and_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
|
||||
void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister);
|
||||
|
||||
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_add_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_add_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_add_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_sub_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_sub_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, sint32 memImmS32, uint64 immU32);
|
||||
void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32);
|
||||
void x64Gen_imul_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 operandRegister);
|
||||
void x64Gen_idiv_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
void x64Gen_div_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
void x64Gen_imul_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
void x64Gen_mul_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
void x64Gen_and_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_and_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_test_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_test_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, sint32 immS32);
|
||||
void x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 memRegister, sint32 memImmS32);
|
||||
void x64Gen_or_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_or_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_xor_reg32_reg32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_xor_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_xor_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
|
||||
void x64Gen_rol_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_rol_reg64Low32_cl(x64GenContext_t* x64GenContext, sint32 srcRegister);
|
||||
void x64Gen_rol_reg64Low16_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_rol_reg64_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_shl_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_shr_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_sar_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
|
||||
void x64Gen_not_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_neg_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_cdq(x64GenContext_t* x64GenContext);
|
||||
|
||||
void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
|
||||
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, sint32 immS32);
|
||||
void x64Gen_setcc_mem8(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 memoryRegister, uint32 memoryImmU32);
|
||||
void x64Gen_setcc_reg64b(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 dataRegister);
|
||||
void x64Gen_bt_mem8(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32, uint8 bitIndex);
|
||||
void x64Gen_cmc(x64GenContext_t* x64GenContext);
|
||||
|
||||
void x64Gen_jmp_imm32(x64GenContext_t* x64GenContext, uint32 destImm32);
|
||||
void x64Gen_jmp_memReg64(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 immU32);
|
||||
void x64Gen_jmpc_far(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 relativeDest);
|
||||
void x64Gen_jmpc_near(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 relativeDest);
|
||||
|
||||
void x64Gen_push_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
|
||||
void x64Gen_pop_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_jmp_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
|
||||
void x64Gen_call_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
|
||||
void x64Gen_ret(x64GenContext_t* x64GenContext);
|
||||
void x64Gen_int3(x64GenContext_t* x64GenContext);
|
||||
|
||||
// floating-point (SIMD/SSE) gen
|
||||
void x64Gen_movaps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSource);
|
||||
void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc, uint8 imm8);
|
||||
void x64Gen_addsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_addpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_subsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_subpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_mulsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_divsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_divpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_comisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32);
|
||||
void x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32);
|
||||
void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memReg, sint32 memImmS32);
|
||||
void x64Gen_cvtsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_sqrtsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_sqrtpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
|
||||
void x64Gen_movd_xmmReg_reg64Low32(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
|
||||
void x64Gen_movd_reg64Low32_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movq_xmmReg_reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
|
||||
void x64Gen_movq_reg64_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 xmmRegisterSrc);
|
||||
|
||||
// AVX
|
||||
|
||||
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
|
||||
void x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
|
||||
void x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
|
||||
|
||||
// BMI
|
||||
void x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
|
||||
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
||||
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
49
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
Normal file
49
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
|
||||
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
void _x64Gen_vex128_nds(x64GenContext_t* x64GenContext, uint8 opcodeMap, uint8 additionalOperand, uint8 pp, uint8 vex_ext, uint8 vex_r, uint8 vex_b, uint8 opcode)
|
||||
{
|
||||
if(vex_b != 0)
|
||||
x64Gen_writeU8(x64GenContext, 0xC4); // three byte VEX
|
||||
else
|
||||
x64Gen_writeU8(x64GenContext, 0xC5); // two byte VEX
|
||||
|
||||
if (vex_b != 0)
|
||||
{
|
||||
uint8 vex_x = 0;
|
||||
x64Gen_writeU8(x64GenContext, (vex_r ? 0x00 : 0x80) | (vex_x ? 0x00 : 0x40) | (vex_b ? 0x00 : 0x20) | 1);
|
||||
}
|
||||
|
||||
x64Gen_writeU8(x64GenContext, (vex_ext<<7) | (((~additionalOperand)&0xF)<<3) | pp);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, opcode);
|
||||
}
|
||||
|
||||
#define VEX_PP_0F 0 // guessed
|
||||
#define VEX_PP_66_0F 1
|
||||
#define VEX_PP_F3_0F 2 // guessed
|
||||
#define VEX_PP_F2_0F 3 // guessed
|
||||
|
||||
|
||||
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
|
||||
{
|
||||
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x6D);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
|
||||
}
|
||||
|
||||
void x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
|
||||
{
|
||||
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x15);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
|
||||
}
|
||||
|
||||
void x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
|
||||
{
|
||||
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x5C);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
|
||||
}
|
80
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
Normal file
80
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
|
||||
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
|
||||
{
|
||||
// MOVBE <dstReg64> (low dword), DWORD [<reg64> + <reg64> + <imm64>]
|
||||
if( dstRegister >= 8 && memRegisterA64 >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x47);
|
||||
else if( memRegisterA64 >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x43);
|
||||
else if( dstRegister >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x42);
|
||||
else if( dstRegister >= 8 && memRegisterA64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x45);
|
||||
else if( dstRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x44);
|
||||
else if( memRegisterA64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
else if( memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x42);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x38);
|
||||
x64Gen_writeU8(x64GenContext, 0xF0);
|
||||
_x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegisterA64, memRegisterB64, memImmS32);
|
||||
}
|
||||
|
||||
void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
|
||||
{
|
||||
// MOVBE <dstReg64> (low word), WORD [<reg64> + <reg64> + <imm64>]
|
||||
// note: Unlike the 32bit version this instruction does not set the upper 32bits of the 64bit register to 0
|
||||
x64Gen_writeU8(x64GenContext, 0x66); // 16bit prefix
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, dstRegister, memRegisterA64, memRegisterB64, memImmS32);
|
||||
}
|
||||
|
||||
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister)
|
||||
{
|
||||
// MOVBE DWORD [<reg64> + <reg64> + <imm64>], <srcReg64> (low dword)
|
||||
if( srcRegister >= 8 && memRegisterA64 >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x47);
|
||||
else if( memRegisterA64 >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x43);
|
||||
else if( srcRegister >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x42);
|
||||
else if( srcRegister >= 8 && memRegisterA64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x45);
|
||||
else if( srcRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x44);
|
||||
else if( memRegisterA64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
else if( memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x42);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x38);
|
||||
x64Gen_writeU8(x64GenContext, 0xF1);
|
||||
_x64Gen_writeMODRMDeprecated(x64GenContext, srcRegister, memRegisterA64, memRegisterB64, memImmS32);
|
||||
}
|
||||
|
||||
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
// SHRX reg64, reg64, reg64
|
||||
x64Gen_writeU8(x64GenContext, 0xC4);
|
||||
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
|
||||
x64Gen_writeU8(x64GenContext, 0xFB - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
||||
|
||||
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
// SHLX reg64, reg64, reg64
|
||||
x64Gen_writeU8(x64GenContext, 0xC4);
|
||||
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
|
||||
x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
1244
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp
Normal file
1244
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp
Normal file
File diff suppressed because it is too large
Load diff
1885
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
Normal file
1885
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
Normal file
File diff suppressed because it is too large
Load diff
752
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
Normal file
752
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
Normal file
|
@ -0,0 +1,752 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
|
||||
void x64Gen_genSSEVEXPrefix2(x64GenContext_t* x64GenContext, sint32 xmmRegister1, sint32 xmmRegister2, bool use64BitMode)
|
||||
{
|
||||
if( xmmRegister1 < 8 && xmmRegister2 < 8 && use64BitMode == false )
|
||||
return;
|
||||
uint8 v = 0x40;
|
||||
if( xmmRegister1 >= 8 )
|
||||
v |= 0x01;
|
||||
if( xmmRegister2 >= 8 )
|
||||
v |= 0x04;
|
||||
if( use64BitMode )
|
||||
v |= 0x08;
|
||||
x64Gen_writeU8(x64GenContext, v);
|
||||
}
|
||||
|
||||
void x64Gen_genSSEVEXPrefix1(x64GenContext_t* x64GenContext, sint32 xmmRegister, bool use64BitMode)
|
||||
{
|
||||
if( xmmRegister < 8 && use64BitMode == false )
|
||||
return;
|
||||
uint8 v = 0x40;
|
||||
if( use64BitMode )
|
||||
v |= 0x01;
|
||||
if( xmmRegister >= 8 )
|
||||
v |= 0x04;
|
||||
x64Gen_writeU8(x64GenContext, v);
|
||||
}
|
||||
|
||||
void x64Gen_movaps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSource)
|
||||
{
|
||||
// SSE
|
||||
// copy xmm register
|
||||
// MOVAPS <xmm>, <xmm>
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSource, xmmRegisterDest, false); // tested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x28); // alternative encoding: 0x29, source and destination register are exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSource&7));
|
||||
}
|
||||
|
||||
void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
// move two doubles from memory into xmm register
|
||||
// MOVUPD <xmm>, [<reg>+<imm>]
|
||||
if( memRegister == REG_ESP )
|
||||
{
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
// 66 0F 10 84 E4 23 01 00 00
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x10);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
// move two doubles from memory into xmm register
|
||||
// MOVUPD [<reg>+<imm>], <xmm>
|
||||
if( memRegister == REG_ESP )
|
||||
{
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x11);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x11);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE3
|
||||
// move one double from memory into lower and upper half of a xmm register
|
||||
if( memRegister == REG_RSP )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<reg>+<imm>]
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
if( xmmRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x44);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_R15 )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<reg>+<imm>]
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
// F2 41 0F 12 87 - 44 33 22 11
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<imm>]
|
||||
// 36 F2 0F 12 05 - 00 00 00 00
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0x36);
|
||||
//x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x12);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE3
|
||||
// move low double from xmm register into lower and upper half of a different xmm register
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE1
|
||||
// move high double from xmm register into lower and upper half of a different xmm register
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// move lower double from xmm register into lower half of a different xmm register, leave other half untouched
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10); // alternative encoding: 0x11, src and dest exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
// move lower 64bits (double) of xmm register to memory location
|
||||
if( memRegister == REG_NONE )
|
||||
{
|
||||
// MOVSD [<imm>], <xmm>
|
||||
// F2 0F 11 05 - 45 23 01 00
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
//x64Gen_genSSEVEXPrefix(x64GenContext, xmmRegister, 0, false);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x11);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+xmmRegister*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_RSP )
|
||||
{
|
||||
// MOVSD [RSP+<imm>], <xmm>
|
||||
// F2 0F 11 84 24 - 33 22 11 00
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x11);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE3
|
||||
// move one double from memory into lower half of a xmm register, leave upper half unchanged(?)
|
||||
if( memRegister == REG_NONE )
|
||||
{
|
||||
// MOVLPD <xmm>, [<imm>]
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x12);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
assert_dbg();
|
||||
}
|
||||
else if( memRegister == REG_RSP )
|
||||
{
|
||||
// MOVLPD <xmm>, [<reg64>+<imm>]
|
||||
// 66 0F 12 84 24 - 33 22 11 00
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x14);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x15);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc, uint8 imm8)
|
||||
{
|
||||
// SSE2
|
||||
// shuffled copy source to destination
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0xC6);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
x64Gen_writeU8(x64GenContext, imm8);
|
||||
}
|
||||
|
||||
void x64Gen_addsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// add bottom double of two xmm registers, leave upper quadword unchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x58);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_addpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// add both doubles of two xmm registers
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x58);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_subsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// subtract bottom double of two xmm registers, leave upper quadword unchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5C);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_subpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// subtract both doubles of two xmm registers
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5C);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_mulsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// multiply bottom double of two xmm registers, leave upper quadword unchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x59);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// multiply both doubles of two xmm registers
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x59);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if (memRegister == REG_NONE)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
else if (memRegister == REG_R14)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x59);
|
||||
x64Gen_writeU8(x64GenContext, 0x86 + (xmmRegister & 7) * 8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_divsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// divide bottom double of two xmm registers, leave upper quadword unchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5E);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_divpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// divide bottom and top double of two xmm registers
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5E);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_comisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// compare bottom doubles
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2F);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// compare bottom double with double from memory location
|
||||
if( memoryReg == REG_R15 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2F);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// compare bottom doubles
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2E);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// compare bottom float with float from memory location
|
||||
if (memoryReg == REG_R15)
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2F);
|
||||
x64Gen_writeU8(x64GenContext, 0x87 + (xmmRegisterDest & 7) * 8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// and xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, memReg, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x56);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// xor xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x57);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if (memRegister == REG_NONE)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
else if (memRegister == REG_R14)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x54);
|
||||
x64Gen_writeU8(x64GenContext, 0x86 + (xmmRegister & 7) * 8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// and xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x54);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// and xmm register with xmm register
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x54);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// doubleword integer compare
|
||||
if( memReg == REG_R15 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x76);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// convert two doubles into two 32-bit integers in bottom part of xmm register, reset upper 64 bits of destination register
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0xE6);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// convert double to truncated integer in general purpose register
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2C);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts bottom 64bit double to bottom 32bit single
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts two 64bit doubles to two 32bit singles in bottom half of register
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts two 32bit singles to two 64bit doubles
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts bottom 32bit single to bottom 64bit double
|
||||
x64Gen_writeU8(x64GenContext, 0xF3);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memReg, sint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// converts two signed 32bit integers to two doubles
|
||||
if( memReg == REG_RSP )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2A);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_cvtsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts bottom 64bit double to 32bit signed integer in general purpose register, round based on float-point control
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2D);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts bottom 64bit double to 32bit signed integer in general purpose register, always truncate
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2C);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_sqrtsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// calculates square root of bottom double
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x51);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_sqrtpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// calculates square root of bottom and top double
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x51);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// approximates reciprocal of bottom 32bit single
|
||||
x64Gen_writeU8(x64GenContext, 0xF3);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x53);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if( memRegister == REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
else if( memRegister == 15 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0xF3);
|
||||
x64Gen_writeU8(x64GenContext, (xmmRegister<8)?0x41:0x45);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x59);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movd_xmmReg_reg64Low32(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
|
||||
{
|
||||
// SSE2
|
||||
// copy low 32bit of general purpose register into xmm register
|
||||
// MOVD <xmm>, <reg32>
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x6E); // alternative encoding: 0x29, source and destination register are exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movd_reg64Low32_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// copy low 32bit of general purpose register into xmm register
|
||||
// MOVD <reg32>, <xmm>
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerDest, xmmRegisterSrc, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x7E); // alternative encoding: 0x29, source and destination register are exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterSrc&7)*8+(registerDest&7));
|
||||
}
|
||||
|
||||
void x64Gen_movq_xmmReg_reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
|
||||
{
|
||||
// SSE2
|
||||
// copy general purpose register into xmm register
|
||||
// MOVD <xmm>, <reg64>
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x6E); // alternative encoding: 0x29, source and destination register are exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movq_reg64_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// copy general purpose register into xmm register
|
||||
// MOVD <xmm>, <reg64>
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerDst, xmmRegisterSrc, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x7E);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterSrc&7)*8+(registerDst&7));
|
||||
}
|
360
src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp
Normal file
360
src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp
Normal file
|
@ -0,0 +1,360 @@
|
|||
|
||||
|
||||
template<uint8 op0, bool rex64Bit = false>
|
||||
class x64_opc_1byte
|
||||
{
|
||||
public:
|
||||
static void emitBytes(x64GenContext_t* x64GenContext)
|
||||
{
|
||||
// write out op0
|
||||
x64Gen_writeU8(x64GenContext, op0);
|
||||
}
|
||||
|
||||
static constexpr bool isRevOrder()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr bool hasRex64BitPrefix()
|
||||
{
|
||||
return rex64Bit;
|
||||
}
|
||||
};
|
||||
|
||||
template<uint8 op0, bool rex64Bit = false>
|
||||
class x64_opc_1byte_rev
|
||||
{
|
||||
public:
|
||||
static void emitBytes(x64GenContext_t* x64GenContext)
|
||||
{
|
||||
// write out op0
|
||||
x64Gen_writeU8(x64GenContext, op0);
|
||||
}
|
||||
|
||||
static constexpr bool isRevOrder()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static constexpr bool hasRex64BitPrefix()
|
||||
{
|
||||
return rex64Bit;
|
||||
}
|
||||
};
|
||||
|
||||
template<uint8 op0, uint8 op1, bool rex64Bit = false>
|
||||
class x64_opc_2byte
|
||||
{
|
||||
public:
|
||||
static void emitBytes(x64GenContext_t* x64GenContext)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, op0);
|
||||
x64Gen_writeU8(x64GenContext, op1);
|
||||
}
|
||||
|
||||
static constexpr bool isRevOrder()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr bool hasRex64BitPrefix()
|
||||
{
|
||||
return rex64Bit;
|
||||
}
|
||||
};
|
||||
|
||||
enum class MODRM_OPR_TYPE
|
||||
{
|
||||
REG,
|
||||
MEM
|
||||
};
|
||||
|
||||
class x64MODRM_opr_reg64
|
||||
{
|
||||
public:
|
||||
x64MODRM_opr_reg64(uint8 reg)
|
||||
{
|
||||
this->reg = reg;
|
||||
}
|
||||
|
||||
static constexpr MODRM_OPR_TYPE getType()
|
||||
{
|
||||
return MODRM_OPR_TYPE::REG;
|
||||
}
|
||||
|
||||
const uint8 getReg() const
|
||||
{
|
||||
return reg;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8 reg;
|
||||
};
|
||||
|
||||
class x64MODRM_opr_memReg64
|
||||
{
|
||||
public:
|
||||
x64MODRM_opr_memReg64(uint8 reg)
|
||||
{
|
||||
this->reg = reg;
|
||||
this->offset = 0;
|
||||
}
|
||||
|
||||
x64MODRM_opr_memReg64(uint8 reg, sint32 offset)
|
||||
{
|
||||
this->reg = reg;
|
||||
this->offset = offset;
|
||||
}
|
||||
|
||||
static constexpr MODRM_OPR_TYPE getType()
|
||||
{
|
||||
return MODRM_OPR_TYPE::MEM;
|
||||
}
|
||||
|
||||
const uint8 getBaseReg() const
|
||||
{
|
||||
return reg;
|
||||
}
|
||||
|
||||
const uint32 getOffset() const
|
||||
{
|
||||
return (uint32)offset;
|
||||
}
|
||||
|
||||
static constexpr bool hasBaseReg()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static constexpr bool hasIndexReg()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
private:
|
||||
uint8 reg;
|
||||
sint32 offset;
|
||||
};
|
||||
|
||||
class x64MODRM_opr_memRegPlusReg
|
||||
{
|
||||
public:
|
||||
x64MODRM_opr_memRegPlusReg(uint8 regBase, uint8 regIndex)
|
||||
{
|
||||
if ((regIndex & 7) == 4)
|
||||
{
|
||||
// cant encode RSP/R12 in index register, switch with base register
|
||||
// this only works if the scaler is 1
|
||||
std::swap(regBase, regIndex);
|
||||
cemu_assert((regBase & 7) != 4);
|
||||
}
|
||||
this->regBase = regBase;
|
||||
this->regIndex = regIndex;
|
||||
this->offset = 0;
|
||||
}
|
||||
|
||||
x64MODRM_opr_memRegPlusReg(uint8 regBase, uint8 regIndex, sint32 offset)
|
||||
{
|
||||
if ((regIndex & 7) == 4)
|
||||
{
|
||||
std::swap(regBase, regIndex);
|
||||
cemu_assert((regIndex & 7) != 4);
|
||||
}
|
||||
this->regBase = regBase;
|
||||
this->regIndex = regIndex;
|
||||
this->offset = offset;
|
||||
}
|
||||
|
||||
static constexpr MODRM_OPR_TYPE getType()
|
||||
{
|
||||
return MODRM_OPR_TYPE::MEM;
|
||||
}
|
||||
|
||||
const uint8 getBaseReg() const
|
||||
{
|
||||
return regBase;
|
||||
}
|
||||
|
||||
const uint8 getIndexReg()
|
||||
{
|
||||
return regIndex;
|
||||
}
|
||||
|
||||
const uint32 getOffset() const
|
||||
{
|
||||
return (uint32)offset;
|
||||
}
|
||||
|
||||
static constexpr bool hasBaseReg()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static constexpr bool hasIndexReg()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
uint8 regBase;
|
||||
uint8 regIndex; // multiplied by scaler which is fixed to 1
|
||||
sint32 offset;
|
||||
};
|
||||
|
||||
template<class opcodeBytes, typename TA, typename TB>
|
||||
void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB)
|
||||
{
|
||||
static_assert(TA::getType() == MODRM_OPR_TYPE::REG);
|
||||
x64Gen_checkBuffer(x64GenContext);
|
||||
// REX prefix
|
||||
// 0100 WRXB
|
||||
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
|
||||
{
|
||||
if (opA.getReg() & 8 || opB.getReg() & 8 || opcodeBytes::hasRex64BitPrefix())
|
||||
{
|
||||
// opA -> REX.B
|
||||
// baseReg -> REX.R
|
||||
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getReg() & 8) ? (1 << 0) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
|
||||
}
|
||||
}
|
||||
else if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::MEM)
|
||||
{
|
||||
if constexpr (opB.hasBaseReg() && opB.hasIndexReg())
|
||||
{
|
||||
if (opA.getReg() & 8 || opB.getBaseReg() & 8 || opB.getIndexReg() & 8 || opcodeBytes::hasRex64BitPrefix())
|
||||
{
|
||||
// opA -> REX.B
|
||||
// baseReg -> REX.R
|
||||
// indexReg -> REX.X
|
||||
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getBaseReg() & 8) ? (1 << 0) : 0) | ((opB.getIndexReg() & 8) ? (1 << 1) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
|
||||
}
|
||||
}
|
||||
else if constexpr (opB.hasBaseReg())
|
||||
{
|
||||
if (opA.getReg() & 8 || opB.getBaseReg() & 8 || opcodeBytes::hasRex64BitPrefix())
|
||||
{
|
||||
// opA -> REX.B
|
||||
// baseReg -> REX.R
|
||||
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getBaseReg() & 8) ? (1 << 0) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (opA.getReg() & 8 || opcodeBytes::hasRex64BitPrefix())
|
||||
{
|
||||
// todo - verify
|
||||
// opA -> REX.B
|
||||
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
// opcode
|
||||
opcodeBytes::emitBytes(x64GenContext);
|
||||
// modrm byte
|
||||
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
|
||||
{
|
||||
// reg, reg
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (opB.getReg() & 7) + ((opA.getReg() & 7) << 3));
|
||||
}
|
||||
else if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::MEM)
|
||||
{
|
||||
if constexpr (TB::hasBaseReg() == false) // todo - also check for index reg and secondary sib reg
|
||||
{
|
||||
// form: [offset]
|
||||
// instruction is just offset
|
||||
cemu_assert(false);
|
||||
}
|
||||
else if constexpr (TB::hasIndexReg())
|
||||
{
|
||||
// form: [base+index*scaler+offset], scaler is currently fixed to 1
|
||||
cemu_assert((opB.getIndexReg() & 7) != 4); // RSP not allowed as index register
|
||||
const uint32 offset = opB.getOffset();
|
||||
if (offset == 0 && (opB.getBaseReg() & 7) != 5) // RBP/R13 has special meaning in no-offset encoding
|
||||
{
|
||||
// [form: index*1+base]
|
||||
x64Gen_writeU8(x64GenContext, 0x00 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte
|
||||
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg()&7) << 3) + (opB.getBaseReg() & 7));
|
||||
}
|
||||
else if (offset == (uint32)(sint32)(sint8)offset)
|
||||
{
|
||||
// [form: index*1+base+sbyte]
|
||||
x64Gen_writeU8(x64GenContext, 0x40 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte
|
||||
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg() & 7) << 3) + (opB.getBaseReg() & 7));
|
||||
x64Gen_writeU8(x64GenContext, (uint8)offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// [form: index*1+base+sdword]
|
||||
x64Gen_writeU8(x64GenContext, 0x80 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte
|
||||
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg() & 7) << 3) + (opB.getBaseReg() & 7));
|
||||
x64Gen_writeU32(x64GenContext, (uint32)offset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// form: [baseReg + offset]
|
||||
const uint32 offset = opB.getOffset();
|
||||
if (offset == 0 && (opB.getBaseReg() & 7) != 5) // RBP/R13 has special meaning in no-offset encoding
|
||||
{
|
||||
// form: [baseReg]
|
||||
// if base reg is RSP/R12 we need to use SIB form of instruction
|
||||
if ((opB.getBaseReg() & 7) == 4)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x00 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte [form: none*1+base]
|
||||
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x00 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
|
||||
}
|
||||
}
|
||||
else if (offset == (uint32)(sint32)(sint8)offset)
|
||||
{
|
||||
// form: [baseReg+sbyte]
|
||||
// if base reg is RSP/R12 we need to use SIB form of instruction
|
||||
if ((opB.getBaseReg() & 7) == 4)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x40 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte [form: none*1+base]
|
||||
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x40 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
|
||||
}
|
||||
x64Gen_writeU8(x64GenContext, (uint8)offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// form: [baseReg+sdword]
|
||||
// if base reg is RSP/R12 we need to use SIB form of instruction
|
||||
if ((opB.getBaseReg() & 7) == 4)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x80 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte [form: none*1+base]
|
||||
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x80 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
|
||||
}
|
||||
x64Gen_writeU32(x64GenContext, (uint32)offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
template<class opcodeBytes, typename TA, typename TB>
|
||||
void x64Gen_writeMODRM_dyn(x64GenContext_t* x64GenContext, TA opLeft, TB opRight)
|
||||
{
|
||||
if constexpr (opcodeBytes::isRevOrder())
|
||||
_x64Gen_writeMODRM_internal<opcodeBytes, TB, TA>(x64GenContext, opRight, opLeft);
|
||||
else
|
||||
_x64Gen_writeMODRM_internal<opcodeBytes, TA, TB>(x64GenContext, opLeft, opRight);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue