mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-09 00:11:17 +12:00
Add all the files
This commit is contained in:
parent
e3db07a16a
commit
d60742f52b
1445 changed files with 430238 additions and 0 deletions
293
src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
Normal file
293
src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
Normal file
|
@ -0,0 +1,293 @@
|
|||
#pragma once
|
||||
#include "Cafe/HW/Espresso/EspressoISA.h"
|
||||
#include "Cafe/HW/MMU/MMU.h"
|
||||
|
||||
bool GamePatch_IsNonReturnFunction(uint32 hleIndex);
|
||||
|
||||
// utility class to determine shape of a function
|
||||
class PPCFunctionBoundaryTracker
|
||||
{
|
||||
public:
|
||||
struct PPCRange_t
|
||||
{
|
||||
PPCRange_t() {};
|
||||
PPCRange_t(uint32 _startAddress) : startAddress(_startAddress) {};
|
||||
|
||||
uint32 startAddress{};
|
||||
uint32 length{};
|
||||
//bool isProcessed{false};
|
||||
|
||||
uint32 getEndAddress() const { return startAddress + length; };
|
||||
};
|
||||
|
||||
public:
|
||||
void trackStartPoint(MPTR startAddress)
|
||||
{
|
||||
processRange(startAddress, nullptr, nullptr);
|
||||
processBranchTargets();
|
||||
}
|
||||
|
||||
bool getRangeForAddress(uint32 address, PPCRange_t& range)
|
||||
{
|
||||
for (auto itr : map_ranges)
|
||||
{
|
||||
if (address >= itr->startAddress && address < (itr->startAddress + itr->length))
|
||||
{
|
||||
range = *itr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
void addBranchDestination(PPCRange_t* sourceRange, MPTR address)
|
||||
{
|
||||
map_branchTargets.emplace(address);
|
||||
}
|
||||
|
||||
// process flow of instruction
|
||||
// returns false if the IP cannot increment past the current instruction
|
||||
bool processInstruction(PPCRange_t* range, MPTR address)
|
||||
{
|
||||
// parse instructions
|
||||
uint32 opcode = memory_readU32(address);
|
||||
switch (Espresso::GetPrimaryOpcode(opcode))
|
||||
{
|
||||
case Espresso::PrimaryOpcode::ZERO:
|
||||
{
|
||||
if (opcode == 0)
|
||||
return false; // invalid instruction
|
||||
break;
|
||||
}
|
||||
case Espresso::PrimaryOpcode::VIRTUAL_HLE:
|
||||
{
|
||||
// end of function
|
||||
// is there a jump to a instruction after this one?
|
||||
uint32 hleFuncId = opcode & 0xFFFF;
|
||||
if (hleFuncId >= 0x1000 && hleFuncId < 0x4000)
|
||||
{
|
||||
if (GamePatch_IsNonReturnFunction(hleFuncId - 0x1000) == false)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case Espresso::PrimaryOpcode::BC:
|
||||
{
|
||||
uint32 BD, BI;
|
||||
Espresso::BOField BO;
|
||||
bool AA, LK;
|
||||
Espresso::decodeOp_BC(opcode, BD, BO, BI, AA, LK);
|
||||
uint32 branchTarget = AA ? BD : BD + address;
|
||||
if (!LK)
|
||||
addBranchDestination(range, branchTarget);
|
||||
break;
|
||||
}
|
||||
case Espresso::PrimaryOpcode::B:
|
||||
{
|
||||
uint32 LI;
|
||||
bool AA, LK;
|
||||
Espresso::decodeOp_B(opcode, LI, AA, LK);
|
||||
uint32 branchTarget = AA ? LI : LI + address;
|
||||
if (!LK)
|
||||
{
|
||||
addBranchDestination(range, branchTarget);
|
||||
// if the next two or previous two instructions are branch instructions, we assume that they are destinations of a jump table
|
||||
// todo - can we make this more reliable by checking for BCTR or similar instructions first?
|
||||
// example: The Swapper 0x01B1FC04
|
||||
if (PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address + 4)) && PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address + 8)) ||
|
||||
PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address - 8)) && PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(memory_readU32(address - 4)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false; // current flow ends at unconditional branch instruction
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Espresso::PrimaryOpcode::GROUP_19:
|
||||
switch (Espresso::GetGroup19Opcode(opcode))
|
||||
{
|
||||
case Espresso::Opcode19::BCLR:
|
||||
{
|
||||
Espresso::BOField BO;
|
||||
uint32 BI;
|
||||
bool LK;
|
||||
Espresso::decodeOp_BCLR(opcode, BO, BI, LK);
|
||||
if (BO.branchAlways() && !LK)
|
||||
{
|
||||
// unconditional BLR
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Espresso::Opcode19::BCCTR:
|
||||
if (opcode == 0x4E800420)
|
||||
{
|
||||
// unconditional BCTR
|
||||
// this instruction is often used for switch statements, therefore we should be wary of ending the function here
|
||||
// It's better to overestimate function size than to predict sizes that are too short
|
||||
|
||||
// Currently we only end the function if the BCTR is followed by a NOP (alignment) or invalid instruction
|
||||
// todo: improve robustness, find better ways to handle false positives
|
||||
uint32 nextOpcode = memory_readU32(address + 4);
|
||||
|
||||
if (nextOpcode == 0x60000000 || PPCRecompilerCalcFuncSize_isValidInstruction(nextOpcode) == false)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// conditional BCTR
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void checkForCollisions()
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
uint32 endOfPrevious = 0;
|
||||
for (auto itr : map_ranges)
|
||||
{
|
||||
if (endOfPrevious > itr->startAddress)
|
||||
{
|
||||
cemu_assert_debug(false);
|
||||
}
|
||||
endOfPrevious = itr->startAddress + itr->length;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// nextRange must point to the closest range after startAddress, or NULL if there is none
|
||||
void processRange(MPTR startAddress, PPCRange_t* previousRange, PPCRange_t* nextRange)
|
||||
{
|
||||
checkForCollisions();
|
||||
cemu_assert_debug(previousRange == nullptr || (startAddress == (previousRange->startAddress + previousRange->length)));
|
||||
PPCRange_t* newRange;
|
||||
if (previousRange && (previousRange->startAddress + previousRange->length) == startAddress)
|
||||
{
|
||||
newRange = previousRange;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_debug(previousRange == nullptr);
|
||||
newRange = new PPCRange_t(startAddress);
|
||||
map_ranges.emplace(newRange);
|
||||
}
|
||||
// process instruction flow until it is interrupted by a non-conditional branch
|
||||
MPTR currentAddress = startAddress;
|
||||
MPTR endAddress = 0xFFFFFFFF;
|
||||
if (nextRange)
|
||||
endAddress = nextRange->startAddress;
|
||||
while (currentAddress < endAddress)
|
||||
{
|
||||
if (!processInstruction(newRange, currentAddress))
|
||||
{
|
||||
currentAddress += 4;
|
||||
break;
|
||||
}
|
||||
currentAddress += 4;
|
||||
}
|
||||
newRange->length = currentAddress - newRange->startAddress;
|
||||
|
||||
if (nextRange && currentAddress >= nextRange->startAddress)
|
||||
{
|
||||
// merge with next range
|
||||
newRange->length = (nextRange->startAddress + nextRange->length) - newRange->startAddress;
|
||||
map_ranges.erase(nextRange);
|
||||
delete nextRange;
|
||||
checkForCollisions();
|
||||
return;
|
||||
}
|
||||
checkForCollisions();
|
||||
}
|
||||
|
||||
// find first unvisited branch target and start a new range there
|
||||
// return true if method should be called again
|
||||
bool processBranchTargetsSinglePass()
|
||||
{
|
||||
cemu_assert_debug(!map_ranges.empty());
|
||||
auto rangeItr = map_ranges.begin();
|
||||
|
||||
PPCRange_t* previousRange = nullptr;
|
||||
for (std::set<uint32_t>::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); )
|
||||
{
|
||||
while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr))
|
||||
{
|
||||
previousRange = *rangeItr;
|
||||
rangeItr++;
|
||||
if (rangeItr == map_ranges.end())
|
||||
{
|
||||
// last range reached
|
||||
if ((previousRange->startAddress + previousRange->length) == *targetItr)
|
||||
processRange(*targetItr, previousRange, nullptr);
|
||||
else
|
||||
processRange(*targetItr, nullptr, nullptr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((*targetItr) >= (*rangeItr)->startAddress &&
|
||||
(*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length))
|
||||
{
|
||||
// delete visited targets
|
||||
targetItr = map_branchTargets.erase(targetItr);
|
||||
continue;
|
||||
}
|
||||
|
||||
cemu_assert_debug((*rangeItr)->startAddress > (*targetItr));
|
||||
if (previousRange && (previousRange->startAddress + previousRange->length) == *targetItr)
|
||||
processRange(*targetItr, previousRange, *rangeItr); // extend previousRange
|
||||
else
|
||||
processRange(*targetItr, nullptr, *rangeItr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void processBranchTargets()
|
||||
{
|
||||
while (processBranchTargetsSinglePass());
|
||||
}
|
||||
|
||||
private:
|
||||
bool PPCRecompilerCalcFuncSize_isUnconditionalBranchInstruction(uint32 opcode)
|
||||
{
|
||||
if (Espresso::GetPrimaryOpcode(opcode) == Espresso::PrimaryOpcode::B)
|
||||
{
|
||||
uint32 LI;
|
||||
bool AA, LK;
|
||||
Espresso::decodeOp_B(opcode, LI, AA, LK);
|
||||
if (!LK)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PPCRecompilerCalcFuncSize_isValidInstruction(uint32 opcode)
|
||||
{
|
||||
if ((opcode >> 26) == 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
struct RangePtrCmp
|
||||
{
|
||||
bool operator()(const PPCRange_t* lhs, const PPCRange_t* rhs) const
|
||||
{
|
||||
return lhs->startAddress < rhs->startAddress;
|
||||
}
|
||||
};
|
||||
|
||||
std::set<PPCRange_t*, RangePtrCmp> map_ranges;
|
||||
std::set<uint32> map_branchTargets;
|
||||
};
|
593
src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp
Normal file
593
src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp
Normal file
|
@ -0,0 +1,593 @@
|
|||
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
|
||||
#include "PPCFunctionBoundaryTracker.h"
|
||||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "Cafe/OS/RPL/rpl.h"
|
||||
#include "util/containers/RangeStore.h"
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "config/LaunchSettings.h"
|
||||
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "Common/ExceptionHandler/ExceptionHandler.h"
|
||||
#include "util/helpers/helpers.h"
|
||||
|
||||
#include "util/MemMapper/MemMapper.h"
|
||||
|
||||
struct PPCInvalidationRange
|
||||
{
|
||||
MPTR startAddress;
|
||||
uint32 size;
|
||||
|
||||
PPCInvalidationRange(MPTR _startAddress, uint32 _size) : startAddress(_startAddress), size(_size) {};
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
FSpinlock recompilerSpinlock;
|
||||
std::queue<MPTR> targetQueue;
|
||||
std::vector<PPCInvalidationRange> invalidationRanges;
|
||||
}PPCRecompilerState;
|
||||
|
||||
RangeStore<PPCRecFunction_t*, uint32, 7703, 0x2000> rangeStore_ppcRanges;
|
||||
|
||||
void ATTR_MS_ABI (*PPCRecompiler_enterRecompilerCode)(uint64 codeMem, uint64 ppcInterpreterInstance);
|
||||
void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_visited)();
|
||||
void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
|
||||
|
||||
PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
|
||||
|
||||
bool ppcRecompilerEnabled = false;
|
||||
|
||||
// this function does never block and can fail if the recompiler lock cannot be acquired immediately
|
||||
void PPCRecompiler_visitAddressNoBlock(uint32 enterAddress)
|
||||
{
|
||||
// quick read-only check without lock
|
||||
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
return;
|
||||
// try to acquire lock
|
||||
if (!PPCRecompilerState.recompilerSpinlock.tryAcquire())
|
||||
return;
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
|
||||
if (funcPtr != PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
{
|
||||
// was visited since previous check
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
return;
|
||||
}
|
||||
// add to recompilation queue and flag as visited
|
||||
PPCRecompilerState.targetQueue.emplace(enterAddress);
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited;
|
||||
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
}
|
||||
|
||||
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress)
|
||||
{
|
||||
if (ppcRecompilerEnabled == false)
|
||||
return;
|
||||
PPCRecompiler_visitAddressNoBlock(enterAddress);
|
||||
}
|
||||
|
||||
void PPCRecompiler_enter(PPCInterpreter_t* hCPU, PPCREC_JUMP_ENTRY funcPtr)
|
||||
{
|
||||
#if BOOST_OS_WINDOWS > 0
|
||||
uint32 prevState = _controlfp(0, 0);
|
||||
_controlfp(_RC_NEAR, _MCW_RC);
|
||||
PPCRecompiler_enterRecompilerCode((uint64)funcPtr, (uint64)hCPU);
|
||||
_controlfp(prevState, _MCW_RC);
|
||||
// debug recompiler exit - useful to find frequently executed functions which couldn't be recompiled
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (hCPU->remainingCycles > 0 && GetAsyncKeyState(VK_F4))
|
||||
{
|
||||
auto t = std::chrono::high_resolution_clock::now();
|
||||
auto dur = std::chrono::duration_cast<std::chrono::microseconds>(t.time_since_epoch()).count();
|
||||
forceLog_printf("Recompiler exit: 0x%08x LR: 0x%08x Timestamp %lld.%04lld", hCPU->instructionPointer, hCPU->spr.LR, dur / 1000LL, (dur % 1000LL));
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
PPCRecompiler_enterRecompilerCode((uint64)funcPtr, (uint64)hCPU);
|
||||
#endif
|
||||
// after leaving recompiler prematurely attempt to recompile the code at the new location
|
||||
if (hCPU->remainingCycles > 0)
|
||||
{
|
||||
PPCRecompiler_visitAddressNoBlock(hCPU->instructionPointer);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_attemptEnterWithoutRecompile(PPCInterpreter_t* hCPU, uint32 enterAddress)
|
||||
{
|
||||
cemu_assert_debug(hCPU->instructionPointer == enterAddress);
|
||||
if (ppcRecompilerEnabled == false)
|
||||
return;
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
|
||||
if (funcPtr != PPCRecompiler_leaveRecompilerCode_unvisited && funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
cemu_assert_debug(ppcRecompilerInstanceData != nullptr);
|
||||
PPCRecompiler_enter(hCPU, funcPtr);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress)
|
||||
{
|
||||
cemu_assert_debug(hCPU->instructionPointer == enterAddress);
|
||||
if (ppcRecompilerEnabled == false)
|
||||
return;
|
||||
if (hCPU->remainingCycles <= 0)
|
||||
return;
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
|
||||
if (funcPtr == PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
{
|
||||
PPCRecompiler_visitAddressNoBlock(enterAddress);
|
||||
}
|
||||
else if (funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
// enter
|
||||
cemu_assert_debug(ppcRecompilerInstanceData != nullptr);
|
||||
PPCRecompiler_enter(hCPU, funcPtr);
|
||||
}
|
||||
}
|
||||
|
||||
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut)
|
||||
{
|
||||
if (range.startAddress >= PPC_REC_CODE_AREA_END)
|
||||
{
|
||||
cemuLog_force("Attempting to recompile function outside of allowed code area");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32 codeGenRangeStart;
|
||||
uint32 codeGenRangeSize = 0;
|
||||
coreinit::OSGetCodegenVirtAddrRangeInternal(codeGenRangeStart, codeGenRangeSize);
|
||||
if (codeGenRangeSize != 0)
|
||||
{
|
||||
if (range.startAddress >= codeGenRangeStart && range.startAddress < (codeGenRangeStart + codeGenRangeSize))
|
||||
{
|
||||
if (coreinit::codeGenShouldAvoid())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
|
||||
ppcRecFunc->ppcAddress = range.startAddress;
|
||||
ppcRecFunc->ppcSize = range.length;
|
||||
// generate intermediate code
|
||||
ppcImlGenContext_t ppcImlGenContext = { 0 };
|
||||
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses);
|
||||
if (compiledSuccessfully == false)
|
||||
{
|
||||
// todo: Free everything
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
delete ppcRecFunc;
|
||||
return NULL;
|
||||
}
|
||||
// emit x64 code
|
||||
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
|
||||
if (x64GenerationSuccess == false)
|
||||
{
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// collect list of PPC-->x64 entry points
|
||||
entryPointsOut.clear();
|
||||
for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
|
||||
if (imlSegment->isEnterable == false)
|
||||
continue;
|
||||
|
||||
uint32 ppcEnterOffset = imlSegment->enterPPCAddress;
|
||||
uint32 x64Offset = imlSegment->x64Offset;
|
||||
|
||||
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
|
||||
}
|
||||
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return ppcRecFunc;
|
||||
}
|
||||
|
||||
bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector<std::pair<MPTR, uint32>>& entryPoints)
|
||||
{
|
||||
// update jump table
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
|
||||
// check if the initial entrypoint is still flagged for recompilation
|
||||
// its possible that the range has been invalidated during the time it took to translate the function
|
||||
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[initialEntryPoint / 4] != PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
return false;
|
||||
}
|
||||
|
||||
// check if the current range got invalidated in the time it took to recompile it
|
||||
bool isInvalidated = false;
|
||||
for (auto& invRange : PPCRecompilerState.invalidationRanges)
|
||||
{
|
||||
MPTR rStartAddr = invRange.startAddress;
|
||||
MPTR rEndAddr = rStartAddr + invRange.size;
|
||||
for (auto& recFuncRange : ppcRecFunc->list_ranges)
|
||||
{
|
||||
if (recFuncRange.ppcAddress < (rEndAddr) && (recFuncRange.ppcAddress + recFuncRange.ppcSize) >= rStartAddr)
|
||||
{
|
||||
isInvalidated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
PPCRecompilerState.invalidationRanges.clear();
|
||||
if (isInvalidated)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// update jump table
|
||||
for (auto& itr : entryPoints)
|
||||
{
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[itr.first / 4] = (PPCREC_JUMP_ENTRY)((uint8*)ppcRecFunc->x86Code + itr.second);
|
||||
}
|
||||
|
||||
|
||||
// due to inlining, some entrypoints can get optimized away
|
||||
// therefore we reset all addresses that are still marked as visited (but not recompiled)
|
||||
// we dont remove the points from the queue but any address thats not marked as visited won't get recompiled
|
||||
// if they are reachable, the interpreter will queue them again
|
||||
for (uint32 v = range.startAddress; v <= (range.startAddress + range.length); v += 4)
|
||||
{
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[v / 4];
|
||||
if (funcPtr == PPCRecompiler_leaveRecompilerCode_visited)
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[v / 4] = PPCRecompiler_leaveRecompilerCode_unvisited;
|
||||
}
|
||||
|
||||
// register ranges
|
||||
for (auto& r : ppcRecFunc->list_ranges)
|
||||
{
|
||||
r.storedRange = rangeStore_ppcRanges.storeRange(ppcRecFunc, r.ppcAddress, r.ppcAddress + r.ppcSize);
|
||||
}
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PPCRecompiler_recompileAtAddress(uint32 address)
|
||||
{
|
||||
cemu_assert_debug(ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[address / 4] == PPCRecompiler_leaveRecompilerCode_visited);
|
||||
|
||||
// get size
|
||||
PPCFunctionBoundaryTracker funcBoundaries;
|
||||
funcBoundaries.trackStartPoint(address);
|
||||
// get range that encompasses address
|
||||
PPCFunctionBoundaryTracker::PPCRange_t range;
|
||||
if (funcBoundaries.getRangeForAddress(address, range) == false)
|
||||
{
|
||||
cemu_assert_debug(false);
|
||||
}
|
||||
|
||||
// todo - use info from previously compiled ranges to determine full size of this function (and merge all the entryAddresses)
|
||||
|
||||
// collect all currently known entry points for this range
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
|
||||
std::set<uint32> entryAddresses;
|
||||
|
||||
entryAddresses.emplace(address);
|
||||
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
|
||||
std::vector<std::pair<MPTR, uint32>> functionEntryPoints;
|
||||
auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints);
|
||||
|
||||
if (!func)
|
||||
{
|
||||
return; // recompilation failed
|
||||
}
|
||||
bool r = PPCRecompiler_makeRecompiledFunctionActive(address, range, func, functionEntryPoints);
|
||||
}
|
||||
|
||||
void PPCRecompiler_thread()
|
||||
{
|
||||
SetThreadName("PPCRecompiler_thread");
|
||||
while (true)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
// asynchronous recompilation:
|
||||
// 1) take address from queue
|
||||
// 2) check if address is still marked as visited
|
||||
// 3) if yes -> calculate size, gather all entry points, recompile and update jump table
|
||||
while (true)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
if (PPCRecompilerState.targetQueue.empty())
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
break;
|
||||
}
|
||||
auto enterAddress = PPCRecompilerState.targetQueue.front();
|
||||
PPCRecompilerState.targetQueue.pop();
|
||||
|
||||
auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4];
|
||||
if (funcPtr != PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
// only recompile functions if marked as visited
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
continue;
|
||||
}
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
|
||||
PPCRecompiler_recompileAtAddress(enterAddress);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define PPC_REC_ALLOC_BLOCK_SIZE (4*1024*1024) // 4MB
|
||||
|
||||
std::bitset<(MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE) / PPC_REC_ALLOC_BLOCK_SIZE> ppcRecompiler_reservedBlockMask;
|
||||
|
||||
void PPCRecompiler_reserveLookupTableBlock(uint32 offset)
|
||||
{
|
||||
uint32 blockIndex = offset / PPC_REC_ALLOC_BLOCK_SIZE;
|
||||
offset = blockIndex * PPC_REC_ALLOC_BLOCK_SIZE;
|
||||
|
||||
if (ppcRecompiler_reservedBlockMask[blockIndex])
|
||||
return;
|
||||
ppcRecompiler_reservedBlockMask[blockIndex] = true;
|
||||
|
||||
void* p1 = MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->ppcRecompilerFuncTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
void* p3 = MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
if( !p1 || !p3 )
|
||||
{
|
||||
forceLog_printf("Failed to allocate memory for recompiler (0x%08x)", offset);
|
||||
cemu_assert(false);
|
||||
return;
|
||||
}
|
||||
for(uint32 i=0; i<PPC_REC_ALLOC_BLOCK_SIZE/4; i++)
|
||||
{
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset/4+i] = PPCRecompiler_leaveRecompilerCode_unvisited;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_allocateRange(uint32 startAddress, uint32 size)
|
||||
{
|
||||
if (ppcRecompilerInstanceData == nullptr)
|
||||
return;
|
||||
uint32 endAddress = (startAddress + size + PPC_REC_ALLOC_BLOCK_SIZE - 1) & ~(PPC_REC_ALLOC_BLOCK_SIZE-1);
|
||||
startAddress = (startAddress) & ~(PPC_REC_ALLOC_BLOCK_SIZE-1);
|
||||
startAddress = std::min(startAddress, (uint32)MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE);
|
||||
endAddress = std::min(endAddress, (uint32)MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE);
|
||||
for (uint32 i = startAddress; i < endAddress; i += PPC_REC_ALLOC_BLOCK_SIZE)
|
||||
{
|
||||
PPCRecompiler_reserveLookupTableBlock(i);
|
||||
}
|
||||
}
|
||||
|
||||
struct ppcRecompilerFuncRange_t
|
||||
{
|
||||
MPTR ppcStart;
|
||||
uint32 ppcSize;
|
||||
void* x86Start;
|
||||
size_t x86Size;
|
||||
};
|
||||
|
||||
DLLEXPORT bool PPCRecompiler_findFuncRanges(uint32 addr, ppcRecompilerFuncRange_t* rangesOut, size_t* countInOut)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
size_t countIn = *countInOut;
|
||||
size_t countOut = 0;
|
||||
|
||||
rangeStore_ppcRanges.findRanges(addr, addr + 4, [rangesOut, countIn, &countOut](uint32 start, uint32 end, PPCRecFunction_t* func)
|
||||
{
|
||||
if (countOut < countIn)
|
||||
{
|
||||
rangesOut[countOut].ppcStart = start;
|
||||
rangesOut[countOut].ppcSize = (end-start);
|
||||
rangesOut[countOut].x86Start = func->x86Code;
|
||||
rangesOut[countOut].x86Size = func->x86Size;
|
||||
}
|
||||
countOut++;
|
||||
}
|
||||
);
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
*countInOut = countOut;
|
||||
if (countOut > countIn)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
DLLEXPORT uintptr_t* PPCRecompiler_getJumpTableBase()
|
||||
{
|
||||
if (ppcRecompilerInstanceData == nullptr)
|
||||
return nullptr;
|
||||
return (uintptr_t*)ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable;
|
||||
}
|
||||
|
||||
void PPCRecompiler_invalidateTableRange(uint32 offset, uint32 size)
|
||||
{
|
||||
if (ppcRecompilerInstanceData == nullptr)
|
||||
return;
|
||||
for (uint32 i = 0; i < size / 4; i++)
|
||||
{
|
||||
ppcRecompilerInstanceData->ppcRecompilerFuncTable[offset / 4 + i] = nullptr;
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset / 4 + i] = PPCRecompiler_leaveRecompilerCode_unvisited;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_deleteFunction(PPCRecFunction_t* func)
|
||||
{
|
||||
// assumes PPCRecompilerState.recompilerSpinlock is already held
|
||||
cemu_assert_debug(PPCRecompilerState.recompilerSpinlock.isHolding());
|
||||
for (auto& r : func->list_ranges)
|
||||
{
|
||||
PPCRecompiler_invalidateTableRange(r.ppcAddress, r.ppcSize);
|
||||
if(r.storedRange)
|
||||
rangeStore_ppcRanges.deleteRange(r.storedRange);
|
||||
r.storedRange = nullptr;
|
||||
}
|
||||
// todo - free x86 code
|
||||
}
|
||||
|
||||
DLLEXPORT void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr)
|
||||
{
|
||||
if (ppcRecompilerEnabled == false)
|
||||
return;
|
||||
if (startAddr >= PPC_REC_CODE_AREA_SIZE)
|
||||
return;
|
||||
cemu_assert_debug(endAddr >= startAddr);
|
||||
|
||||
PPCRecompilerState.recompilerSpinlock.acquire();
|
||||
|
||||
uint32 rStart;
|
||||
uint32 rEnd;
|
||||
PPCRecFunction_t* rFunc;
|
||||
|
||||
// mark range as unvisited
|
||||
for (uint64 currentAddr = (uint64)startAddr&~3; currentAddr < (uint64)(endAddr&~3); currentAddr += 4)
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[currentAddr / 4] = PPCRecompiler_leaveRecompilerCode_unvisited;
|
||||
|
||||
// add entry to invalidation queue
|
||||
PPCRecompilerState.invalidationRanges.emplace_back(startAddr, endAddr-startAddr);
|
||||
|
||||
|
||||
while (rangeStore_ppcRanges.findFirstRange(startAddr, endAddr, rStart, rEnd, rFunc) )
|
||||
{
|
||||
PPCRecompiler_deleteFunction(rFunc);
|
||||
}
|
||||
|
||||
PPCRecompilerState.recompilerSpinlock.release();
|
||||
}
|
||||
|
||||
void PPCRecompiler_init()
|
||||
{
|
||||
if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter)
|
||||
{
|
||||
ppcRecompilerEnabled = false;
|
||||
return;
|
||||
}
|
||||
if (LaunchSettings::ForceInterpreter())
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter was passed");
|
||||
return;
|
||||
}
|
||||
if (ppcRecompilerInstanceData)
|
||||
{
|
||||
MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t));
|
||||
ppcRecompilerInstanceData = nullptr;
|
||||
}
|
||||
debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
|
||||
ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
|
||||
MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
uint32 codeRegionEnd = RPLLoader_GetMaxCodeOffset();
|
||||
codeRegionEnd = (codeRegionEnd + PPC_REC_ALLOC_BLOCK_SIZE - 1) & ~(PPC_REC_ALLOC_BLOCK_SIZE - 1);
|
||||
|
||||
uint32 codeRegionSize = codeRegionEnd - PPC_REC_CODE_AREA_START;
|
||||
forceLogDebug_printf("Allocating recompiler tables for range 0x%08x-0x%08x", PPC_REC_CODE_AREA_START, codeRegionEnd);
|
||||
|
||||
for (uint32 i = 0; i < codeRegionSize; i += PPC_REC_ALLOC_BLOCK_SIZE)
|
||||
{
|
||||
PPCRecompiler_reserveLookupTableBlock(i);
|
||||
}
|
||||
|
||||
// init x64 recompiler instance data
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000;
|
||||
|
||||
// setup GQR scale tables
|
||||
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
float a = 1.0f / (float)(1u << i);
|
||||
float b = 0;
|
||||
if (i == 0)
|
||||
b = 4294967296.0f;
|
||||
else
|
||||
b = (float)(1u << (32u - i));
|
||||
|
||||
float ar = (float)(1u << i);
|
||||
float br = 0;
|
||||
if (i == 0)
|
||||
br = 1.0f / 4294967296.0f;
|
||||
else
|
||||
br = 1.0f / (float)(1u << (32u - i));
|
||||
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 0] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 1] = 1.0f;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 0] = b;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 0] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 1] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 0] = b;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 1] = b;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 0] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 1] = 1.0f;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 0] = br;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 0] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 1] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 0] = br;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br;
|
||||
}
|
||||
|
||||
// mxcsr
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
|
||||
|
||||
// query processor extensions
|
||||
int cpuInfo[4];
|
||||
__cpuid(cpuInfo, 0x80000001);
|
||||
hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0;
|
||||
__cpuid(cpuInfo, 0x1);
|
||||
hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0;
|
||||
hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0;
|
||||
__cpuidex(cpuInfo, 0x7, 0);
|
||||
hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0;
|
||||
|
||||
forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", hasLZCNTSupport ? "LZCNT " : "", hasMOVBESupport ? "MOVBE " : "", hasAVXSupport ? "AVX " : "");
|
||||
|
||||
ppcRecompilerEnabled = true;
|
||||
|
||||
// launch recompilation thread
|
||||
std::thread t_recompiler(PPCRecompiler_thread);
|
||||
t_recompiler.detach();
|
||||
}
|
399
src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h
Normal file
399
src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h
Normal file
|
@ -0,0 +1,399 @@
|
|||
#include <vector>
|
||||
|
||||
#define PPC_REC_CODE_AREA_START (0x00000000) // lower bound of executable memory area. Recompiler expects this address to be 0
|
||||
#define PPC_REC_CODE_AREA_END (0x10000000) // upper bound of executable memory area
|
||||
#define PPC_REC_CODE_AREA_SIZE (PPC_REC_CODE_AREA_END - PPC_REC_CODE_AREA_START)
|
||||
|
||||
#define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1))
|
||||
|
||||
#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 ppcSize;
|
||||
//void* x86Start;
|
||||
//size_t x86Size;
|
||||
void* storedRange;
|
||||
}ppcRecRange_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 ppcSize; // ppc code size of function
|
||||
void* x86Code; // pointer to x86 code
|
||||
size_t x86Size;
|
||||
std::vector<ppcRecRange_t> list_ranges;
|
||||
}PPCRecFunction_t;
|
||||
|
||||
#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0)
|
||||
#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1)
|
||||
#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision
|
||||
#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8 type;
|
||||
uint8 operation;
|
||||
uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr.
|
||||
uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior
|
||||
uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated
|
||||
uint32 associatedPPCAddress; // ppc address that is associated with this instruction
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8 _padding[7];
|
||||
}padding;
|
||||
struct
|
||||
{
|
||||
// R (op) A [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
}op_r_r;
|
||||
struct
|
||||
{
|
||||
// R = A (op) B [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
uint8 registerB;
|
||||
}op_r_r_r;
|
||||
struct
|
||||
{
|
||||
// R = A (op) immS32 [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
sint32 immS32;
|
||||
}op_r_r_s32;
|
||||
struct
|
||||
{
|
||||
// R/F = NAME or NAME = R/F
|
||||
uint8 registerIndex;
|
||||
uint8 copyWidth;
|
||||
uint32 name;
|
||||
uint8 flags;
|
||||
}op_r_name;
|
||||
struct
|
||||
{
|
||||
// R (op) s32 [update cr* in mode *]
|
||||
uint8 registerIndex;
|
||||
sint32 immS32;
|
||||
}op_r_immS32;
|
||||
struct
|
||||
{
|
||||
uint32 address;
|
||||
uint8 flags;
|
||||
}op_jumpmark;
|
||||
struct
|
||||
{
|
||||
uint32 param;
|
||||
uint32 param2;
|
||||
uint16 paramU16;
|
||||
}op_macro;
|
||||
struct
|
||||
{
|
||||
uint32 jumpmarkAddress;
|
||||
bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress
|
||||
uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup)
|
||||
uint8 crRegisterIndex;
|
||||
uint8 crBitIndex;
|
||||
bool bitMustBeSet;
|
||||
}op_conditionalJump;
|
||||
struct
|
||||
{
|
||||
uint8 registerData;
|
||||
uint8 registerMem;
|
||||
uint8 registerMem2;
|
||||
uint8 registerGQR;
|
||||
uint8 copyWidth;
|
||||
//uint8 flags;
|
||||
struct
|
||||
{
|
||||
bool swapEndian : 1;
|
||||
bool signExtend : 1;
|
||||
bool notExpanded : 1; // for floats
|
||||
}flags2;
|
||||
uint8 mode; // transfer mode (copy width, ps0/ps1 behavior)
|
||||
sint32 immS32;
|
||||
}op_storeLoad;
|
||||
struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8 registerMem;
|
||||
sint32 immS32;
|
||||
}src;
|
||||
struct
|
||||
{
|
||||
uint8 registerMem;
|
||||
sint32 immS32;
|
||||
}dst;
|
||||
uint8 copyWidth;
|
||||
}op_mem2mem;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperand;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperandA;
|
||||
uint8 registerOperandB;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperandA;
|
||||
uint8 registerOperandB;
|
||||
uint8 registerOperandC;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
//uint8 flags;
|
||||
}op_fpr_r;
|
||||
struct
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 x64Offset;
|
||||
}op_ppcEnter;
|
||||
struct
|
||||
{
|
||||
uint8 crD; // crBitIndex (result)
|
||||
uint8 crA; // crBitIndex
|
||||
uint8 crB; // crBitIndex
|
||||
}op_cr;
|
||||
// conditional operations (emitted if supported by target platform)
|
||||
struct
|
||||
{
|
||||
// r_s32
|
||||
uint8 registerIndex;
|
||||
sint32 immS32;
|
||||
// condition
|
||||
uint8 crRegisterIndex;
|
||||
uint8 crBitIndex;
|
||||
bool bitMustBeSet;
|
||||
}op_conditional_r_s32;
|
||||
};
|
||||
}PPCRecImlInstruction_t;
|
||||
|
||||
typedef struct _PPCRecImlSegment_t PPCRecImlSegment_t;
|
||||
|
||||
typedef struct _ppcRecompilerSegmentPoint_t
|
||||
{
|
||||
sint32 index;
|
||||
PPCRecImlSegment_t* imlSegment;
|
||||
_ppcRecompilerSegmentPoint_t* next;
|
||||
_ppcRecompilerSegmentPoint_t* prev;
|
||||
}ppcRecompilerSegmentPoint_t;
|
||||
|
||||
struct raLivenessLocation_t
|
||||
{
|
||||
sint32 index;
|
||||
bool isRead;
|
||||
bool isWrite;
|
||||
|
||||
raLivenessLocation_t() {};
|
||||
|
||||
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
|
||||
: index(index), isRead(isRead), isWrite(isWrite) {};
|
||||
};
|
||||
|
||||
struct raLivenessSubrangeLink_t
|
||||
{
|
||||
struct raLivenessSubrange_t* prev;
|
||||
struct raLivenessSubrange_t* next;
|
||||
};
|
||||
|
||||
struct raLivenessSubrange_t
|
||||
{
|
||||
struct raLivenessRange_t* range;
|
||||
PPCRecImlSegment_t* imlSegment;
|
||||
ppcRecompilerSegmentPoint_t start;
|
||||
ppcRecompilerSegmentPoint_t end;
|
||||
// dirty state tracking
|
||||
bool _noLoad;
|
||||
bool hasStore;
|
||||
bool hasStoreDelayed;
|
||||
// next
|
||||
raLivenessSubrange_t* subrangeBranchTaken;
|
||||
raLivenessSubrange_t* subrangeBranchNotTaken;
|
||||
// processing
|
||||
uint32 lastIterationIndex;
|
||||
// instruction locations
|
||||
std::vector<raLivenessLocation_t> list_locations;
|
||||
// linked list (subranges with same GPR virtual register)
|
||||
raLivenessSubrangeLink_t link_sameVirtualRegisterGPR;
|
||||
// linked list (all subranges for this segment)
|
||||
raLivenessSubrangeLink_t link_segmentSubrangesGPR;
|
||||
};
|
||||
|
||||
struct raLivenessRange_t
|
||||
{
|
||||
sint32 virtualRegister;
|
||||
sint32 physicalRegister;
|
||||
sint32 name;
|
||||
std::vector<raLivenessSubrange_t*> list_subranges;
|
||||
};
|
||||
|
||||
struct PPCSegmentRegisterAllocatorInfo_t
|
||||
{
|
||||
// analyzer stage
|
||||
bool isPartOfProcessedLoop{}; // used during loop detection
|
||||
sint32 lastIterationIndex{};
|
||||
// linked lists
|
||||
raLivenessSubrange_t* linkedList_allSubranges{};
|
||||
raLivenessSubrange_t* linkedList_perVirtualGPR[PPC_REC_MAX_VIRTUAL_GPR]{};
|
||||
};
|
||||
|
||||
struct PPCRecVGPRDistances_t
|
||||
{
|
||||
struct _RegArrayEntry
|
||||
{
|
||||
sint32 usageStart{};
|
||||
sint32 usageEnd{};
|
||||
}reg[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{};
|
||||
};
|
||||
|
||||
typedef struct _PPCRecImlSegment_t
|
||||
{
|
||||
sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection)
|
||||
sint32 startOffset{}; // offset to first instruction in iml instruction list
|
||||
sint32 count{}; // number of instructions in segment
|
||||
uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address)
|
||||
uint32 x64Offset{}; // x64 code offset of segment start
|
||||
uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly)
|
||||
// list of intermediate instructions in this segment
|
||||
PPCRecImlInstruction_t* imlList{};
|
||||
sint32 imlListSize{};
|
||||
sint32 imlListCount{};
|
||||
// segment link
|
||||
_PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch
|
||||
_PPCRecImlSegment_t* nextSegmentBranchTaken{};
|
||||
bool nextSegmentIsUncertain{};
|
||||
sint32 loopDepth{};
|
||||
//sList_t* list_prevSegments;
|
||||
std::vector<_PPCRecImlSegment_t*> list_prevSegments{};
|
||||
// PPC range of segment
|
||||
uint32 ppcAddrMin{};
|
||||
uint32 ppcAddrMax{};
|
||||
// enterable segments
|
||||
bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
|
||||
uint32 enterPPCAddress{}; // used if isEnterable is true
|
||||
// jump destination segments
|
||||
bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps
|
||||
uint32 jumpDestinationPPCAddress{};
|
||||
// PPC FPR use mask
|
||||
bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR
|
||||
// CR use mask
|
||||
uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten)
|
||||
uint32 crBitsRead{}; // all bits that are read in this segment
|
||||
uint32 crBitsWritten{}; // bits that are written in this segment
|
||||
// register allocator info
|
||||
PPCSegmentRegisterAllocatorInfo_t raInfo{};
|
||||
PPCRecVGPRDistances_t raDistances{};
|
||||
bool raRangeExtendProcessed{};
|
||||
// segment points
|
||||
ppcRecompilerSegmentPoint_t* segmentPointList{};
|
||||
}PPCRecImlSegment_t;
|
||||
|
||||
struct ppcImlGenContext_t
|
||||
{
|
||||
PPCRecFunction_t* functionRef;
|
||||
uint32* currentInstruction;
|
||||
uint32 ppcAddressOfCurrentInstruction;
|
||||
// fpr mode
|
||||
bool LSQE{ true };
|
||||
bool PSE{ true };
|
||||
// cycle counter
|
||||
uint32 cyclesSinceLastBranch; // used to track ppc cycles
|
||||
// temporary general purpose registers
|
||||
uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
// temporary floating point registers (single and double precision)
|
||||
uint32 mappedFPRRegister[256];
|
||||
// list of intermediate instructions
|
||||
PPCRecImlInstruction_t* imlList;
|
||||
sint32 imlListSize;
|
||||
sint32 imlListCount;
|
||||
// list of segments
|
||||
PPCRecImlSegment_t** segmentList;
|
||||
sint32 segmentListSize;
|
||||
sint32 segmentListCount;
|
||||
// code generation control
|
||||
bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode
|
||||
// register allocator info
|
||||
struct
|
||||
{
|
||||
std::vector<raLivenessRange_t*> list_ranges;
|
||||
}raInfo;
|
||||
// analysis info
|
||||
struct
|
||||
{
|
||||
bool modifiesGQR[8];
|
||||
}tracking;
|
||||
};
|
||||
|
||||
typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)();
|
||||
|
||||
typedef struct
|
||||
{
|
||||
PPCRecFunction_t* ppcRecompilerFuncTable[PPC_REC_ALIGN_TO_4MB(PPC_REC_CODE_AREA_SIZE/4)]; // one virtual-function pointer for each potential ppc instruction
|
||||
PPCREC_JUMP_ENTRY ppcRecompilerDirectJumpTable[PPC_REC_ALIGN_TO_4MB(PPC_REC_CODE_AREA_SIZE/4)]; // lookup table for ppc offset to native code function
|
||||
// x64 data
|
||||
uint64 __declspec(align(16)) _x64XMM_xorNegateMaskBottom[2];
|
||||
uint64 __declspec(align(16)) _x64XMM_xorNegateMaskPair[2];
|
||||
uint64 __declspec(align(16)) _x64XMM_xorNOTMask[2];
|
||||
uint64 __declspec(align(16)) _x64XMM_andAbsMaskBottom[2];
|
||||
uint64 __declspec(align(16)) _x64XMM_andAbsMaskPair[2];
|
||||
uint32 __declspec(align(16)) _x64XMM_andFloatAbsMaskBottom[4];
|
||||
uint64 __declspec(align(16)) _x64XMM_singleWordMask[2];
|
||||
double __declspec(align(16)) _x64XMM_constDouble1_1[2];
|
||||
double __declspec(align(16)) _x64XMM_constDouble0_0[2];
|
||||
float __declspec(align(16)) _x64XMM_constFloat0_0[2];
|
||||
float __declspec(align(16)) _x64XMM_constFloat1_1[2];
|
||||
float __declspec(align(16)) _x64XMM_constFloatMin[2];
|
||||
uint32 __declspec(align(16)) _x64XMM_flushDenormalMask1[4];
|
||||
uint32 __declspec(align(16)) _x64XMM_flushDenormalMaskResetSignBits[4];
|
||||
// PSQ load/store scale tables
|
||||
double _psq_ld_scale_ps0_ps1[64 * 2];
|
||||
double _psq_ld_scale_ps0_1[64 * 2];
|
||||
double _psq_st_scale_ps0_ps1[64 * 2];
|
||||
double _psq_st_scale_ps0_1[64 * 2];
|
||||
// MXCSR
|
||||
uint32 _x64XMM_mxCsr_ftzOn;
|
||||
uint32 _x64XMM_mxCsr_ftzOff;
|
||||
}PPCRecompilerInstanceData_t;
|
||||
|
||||
extern __declspec(dllexport) PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
|
||||
extern bool ppcRecompilerEnabled;
|
||||
|
||||
__declspec(dllexport) void PPCRecompiler_init();
|
||||
|
||||
void PPCRecompiler_allocateRange(uint32 startAddress, uint32 size);
|
||||
|
||||
DLLEXPORT void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr);
|
||||
|
||||
extern void ATTR_MS_ABI (*PPCRecompiler_enterRecompilerCode)(uint64 codeMem, uint64 ppcInterpreterInstance);
|
||||
extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_visited)();
|
||||
extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
|
||||
|
||||
#define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1)
|
||||
|
||||
// CPUID
|
||||
extern __declspec(dllexport) bool hasLZCNTSupport;
|
||||
extern __declspec(dllexport) bool hasMOVBESupport;
|
||||
extern __declspec(dllexport) bool hasBMI2Support;
|
||||
extern __declspec(dllexport) bool hasAVXSupport;
|
||||
|
||||
// todo - move some of the stuff above into PPCRecompilerInternal.h
|
||||
|
||||
// recompiler interface
|
||||
|
||||
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress);
|
||||
void PPCRecompiler_attemptEnter(struct PPCInterpreter_t* hCPU, uint32 enterAddress);
|
||||
void PPCRecompiler_attemptEnterWithoutRecompile(struct PPCInterpreter_t* hCPU, uint32 enterAddress);
|
422
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h
Normal file
422
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h
Normal file
|
@ -0,0 +1,422 @@
|
|||
|
||||
#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example)
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_OP_ASSIGN, // '=' operator
|
||||
PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
|
||||
PPCREC_IML_OP_ADD, // '+' operator
|
||||
PPCREC_IML_OP_SUB, // '-' operator
|
||||
PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit
|
||||
PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr)
|
||||
PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr)
|
||||
PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
|
||||
PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
|
||||
PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
|
||||
PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
|
||||
PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
|
||||
PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit
|
||||
PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit
|
||||
PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag
|
||||
PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag
|
||||
// assign operators with cast
|
||||
PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend
|
||||
PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend
|
||||
// binary operation
|
||||
PPCREC_IML_OP_OR, // '|' operator
|
||||
PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first
|
||||
PPCREC_IML_OP_AND, // '&' operator
|
||||
PPCREC_IML_OP_XOR, // '^' operator
|
||||
PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
|
||||
PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
|
||||
PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned)
|
||||
PPCREC_IML_OP_NOT, // complement each bit
|
||||
PPCREC_IML_OP_NEG, // negate
|
||||
// ppc
|
||||
PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask)
|
||||
PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag)
|
||||
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_CNTLZW,
|
||||
PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry)
|
||||
PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
|
||||
PPCREC_IML_OP_MFCR, // copy cr to gpr
|
||||
PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask)
|
||||
// condition register
|
||||
PPCREC_IML_OP_CR_CLEAR, // clear cr bit
|
||||
PPCREC_IML_OP_CR_SET, // set cr bit
|
||||
PPCREC_IML_OP_CR_OR, // OR cr bits
|
||||
PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first
|
||||
PPCREC_IML_OP_CR_AND, // AND cr bits
|
||||
PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first
|
||||
// FPU
|
||||
PPCREC_IML_OP_FPR_ADD_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_ADD_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_PAIR,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_PAIR,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
|
||||
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half
|
||||
PPCREC_IML_OP_FPR_FCMPO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_FCMPU_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_FCMPU_TOP,
|
||||
PPCREC_IML_OP_FPR_NEGATE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_NEGATE_PAIR,
|
||||
PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ABS_PAIR,
|
||||
PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision
|
||||
PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT,
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
|
||||
PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
|
||||
PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
|
||||
// PS
|
||||
PPCREC_IML_OP_FPR_SUM0,
|
||||
PPCREC_IML_OP_FPR_SUM1,
|
||||
};
|
||||
|
||||
#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_MACRO_BLR, // macro for BLR instruction code
|
||||
PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code
|
||||
PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code
|
||||
PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code
|
||||
PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
|
||||
PPCREC_IML_MACRO_B_FAR, // branch to different function
|
||||
PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
|
||||
PPCREC_IML_MACRO_HLE, // HLE function call
|
||||
PPCREC_IML_MACRO_MFTB, // get TB register value (low or high)
|
||||
PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
|
||||
// debugging
|
||||
PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_JUMP_CONDITION_NONE,
|
||||
PPCREC_JUMP_CONDITION_E, // equal / zero
|
||||
PPCREC_JUMP_CONDITION_NE, // not equal / not zero
|
||||
PPCREC_JUMP_CONDITION_LE, // less or equal
|
||||
PPCREC_JUMP_CONDITION_L, // less
|
||||
PPCREC_JUMP_CONDITION_GE, // greater or equal
|
||||
PPCREC_JUMP_CONDITION_G, // greater
|
||||
// special case:
|
||||
PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling
|
||||
PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow
|
||||
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_CR_MODE_COMPARE_SIGNED,
|
||||
PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare
|
||||
// others: PPCREC_CR_MODE_ARITHMETIC,
|
||||
PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code)
|
||||
PPCREC_CR_MODE_LOGICAL,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_TYPE_NONE,
|
||||
PPCREC_IML_TYPE_NO_OP, // no-op instruction
|
||||
PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction)
|
||||
PPCREC_IML_TYPE_R_R, // r* (op) *r
|
||||
PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
|
||||
PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
|
||||
PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
|
||||
PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
|
||||
PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
|
||||
PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
|
||||
PPCREC_IML_TYPE_R_NAME, // r* = name
|
||||
PPCREC_IML_TYPE_NAME_R, // name* = r*
|
||||
PPCREC_IML_TYPE_R_S32, // r* (op) imm
|
||||
PPCREC_IML_TYPE_MACRO,
|
||||
PPCREC_IML_TYPE_CJUMP, // conditional jump
|
||||
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0
|
||||
PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable
|
||||
PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands)
|
||||
// conditional
|
||||
PPCREC_IML_TYPE_CONDITIONAL_R_S32,
|
||||
// FPR
|
||||
PPCREC_IML_TYPE_FPR_R_NAME, // name = f*
|
||||
PPCREC_IML_TYPE_FPR_NAME_R, // f* = name
|
||||
PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R_R_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R,
|
||||
// special
|
||||
PPCREC_IML_TYPE_MEM2MEM, // memory to memory copy (deprecated)
|
||||
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_NAME_NONE,
|
||||
PPCREC_NAME_TEMPORARY,
|
||||
PPCREC_NAME_R0 = 1000,
|
||||
PPCREC_NAME_SPR0 = 2000,
|
||||
PPCREC_NAME_FPR0 = 3000,
|
||||
PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7
|
||||
//PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away)
|
||||
};
|
||||
|
||||
// special cases for LOAD/STORE
|
||||
#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value)
|
||||
#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid)
|
||||
#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1
|
||||
#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2
|
||||
#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3
|
||||
#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1
|
||||
#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2
|
||||
#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3
|
||||
|
||||
#define PPC_REC_INVALID_REGISTER 0xFF
|
||||
|
||||
#define PPCREC_CR_BIT_LT 0
|
||||
#define PPCREC_CR_BIT_GT 1
|
||||
#define PPCREC_CR_BIT_EQ 2
|
||||
#define PPCREC_CR_BIT_SO 3
|
||||
|
||||
enum
|
||||
{
|
||||
// fpr load
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1,
|
||||
// fpr store
|
||||
PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0
|
||||
PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0
|
||||
|
||||
PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
|
||||
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1,
|
||||
};
|
||||
|
||||
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set<uint32>& entryAddresses);
|
||||
void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount);
|
||||
PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index);
|
||||
|
||||
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count);
|
||||
|
||||
void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index);
|
||||
void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint);
|
||||
|
||||
// GPR register management
|
||||
uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
|
||||
uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
|
||||
// FPR register management
|
||||
uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
|
||||
uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
|
||||
// IML instruction generation
|
||||
void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0);
|
||||
|
||||
|
||||
|
||||
// IML instruction generation (new style, can generate new instructions but also overwrite existing ones)
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth);
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER);
|
||||
|
||||
// IML generation - FPU
|
||||
bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FNABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_ABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MERGE00(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MERGE01(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
|
||||
// IML general
|
||||
|
||||
bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml);
|
||||
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew);
|
||||
void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment);
|
||||
|
||||
// IML analyzer
|
||||
typedef struct
|
||||
{
|
||||
uint32 readCRBits;
|
||||
uint32 writtenCRBits;
|
||||
}PPCRecCRTracking_t;
|
||||
|
||||
bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment);
|
||||
bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking);
|
||||
|
||||
// IML optimizer
|
||||
bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// IML register allocator
|
||||
void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// late optimizations
|
||||
void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// debug
|
||||
|
||||
void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
sint16 readNamedReg1;
|
||||
sint16 readNamedReg2;
|
||||
sint16 readNamedReg3;
|
||||
sint16 writtenNamedReg1;
|
||||
};
|
||||
sint16 gpr[4]; // 3 read + 1 write
|
||||
};
|
||||
// FPR
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
// note: If destination operand is not fully written, it will be added as a read FPR as well
|
||||
sint16 readFPR1;
|
||||
sint16 readFPR2;
|
||||
sint16 readFPR3;
|
||||
sint16 readFPR4; // usually this is set to the result FPR if only partially overwritten
|
||||
sint16 writtenFPR1;
|
||||
};
|
||||
sint16 fpr[4];
|
||||
};
|
||||
}PPCImlOptimizerUsedRegisters_t;
|
||||
|
||||
void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed);
|
137
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
Normal file
137
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
Normal file
|
@ -0,0 +1,137 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "util/helpers/fixedSizeList.h"
|
||||
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
|
||||
|
||||
/*
|
||||
* Initializes a single segment and returns true if it is a finite loop
|
||||
*/
|
||||
bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
bool isTightFiniteLoop = false;
|
||||
// base criteria, must jump to beginning of same segment
|
||||
if (imlSegment->nextSegmentBranchTaken != imlSegment)
|
||||
return false;
|
||||
// loops using BDNZ are assumed to always be finite
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB && imlSegment->imlList[t].crRegister == 8)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// for non-BDNZ loops, check for common patterns
|
||||
// risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB)
|
||||
// this catches most loops with load-update and store-update instructions, but also those with decrementing counters
|
||||
FixedSizeList<sint32, 64, true> list_modifiedRegisters;
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB) )
|
||||
{
|
||||
list_modifiedRegisters.addUnique(imlSegment->imlList[t].op_r_immS32.registerIndex);
|
||||
}
|
||||
}
|
||||
if (list_modifiedRegisters.count > 0)
|
||||
{
|
||||
// remove all registers from the list that are modified by non-ADD/SUB instructions
|
||||
// todo: We should also cover the case where ADD+SUB on the same register cancel the effect out
|
||||
PPCImlOptimizerUsedRegisters_t registersUsed;
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB))
|
||||
continue;
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + t, ®istersUsed);
|
||||
if(registersUsed.writtenNamedReg1 < 0)
|
||||
continue;
|
||||
list_modifiedRegisters.remove(registersUsed.writtenNamedReg1);
|
||||
}
|
||||
if (list_modifiedRegisters.count > 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister)
|
||||
*/
|
||||
bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction)
|
||||
{
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking)
|
||||
{
|
||||
crTracking->readCRBits = 0;
|
||||
crTracking->writtenCRBits = 0;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP)
|
||||
{
|
||||
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex);
|
||||
crTracking->readCRBits = (crBitFlag);
|
||||
}
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex);
|
||||
crTracking->readCRBits = crBitFlag;
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR)
|
||||
{
|
||||
crTracking->readCRBits = 0xFFFFFFFF;
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF)
|
||||
{
|
||||
crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_CR)
|
||||
{
|
||||
if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_SET)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
|
||||
crTracking->writtenCRBits = crBitFlag;
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_AND ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
|
||||
crTracking->writtenCRBits = crBitFlag;
|
||||
crBitFlag = 1 << (imlInstruction->op_cr.crA);
|
||||
crTracking->readCRBits = crBitFlag;
|
||||
crBitFlag = 1 << (imlInstruction->op_cr.crB);
|
||||
crTracking->readCRBits |= crBitFlag;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
else if (PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7)
|
||||
{
|
||||
crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4));
|
||||
}
|
||||
else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
|
||||
{
|
||||
// overwrites CR0
|
||||
crTracking->writtenCRBits |= (0xF << 0);
|
||||
}
|
||||
}
|
5026
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
Normal file
5026
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
Normal file
File diff suppressed because it is too large
Load diff
1926
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp
Normal file
1926
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp
Normal file
File diff suppressed because it is too large
Load diff
2175
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
Normal file
2175
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
Normal file
File diff suppressed because it is too large
Load diff
399
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
Normal file
399
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
Normal file
|
@ -0,0 +1,399 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "PPCRecompilerImlRanges.h"
|
||||
#include "util/helpers/MemoryPool.h"
|
||||
|
||||
void PPCRecRARange_addLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if ((*root) && (*root)->range->virtualRegister != subrange->range->virtualRegister)
|
||||
assert_dbg();
|
||||
#endif
|
||||
subrange->link_sameVirtualRegisterGPR.next = *root;
|
||||
if (*root)
|
||||
(*root)->link_sameVirtualRegisterGPR.prev = subrange;
|
||||
subrange->link_sameVirtualRegisterGPR.prev = nullptr;
|
||||
*root = subrange;
|
||||
}
|
||||
|
||||
void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
subrange->link_segmentSubrangesGPR.next = *root;
|
||||
if (*root)
|
||||
(*root)->link_segmentSubrangesGPR.prev = subrange;
|
||||
subrange->link_segmentSubrangesGPR.prev = nullptr;
|
||||
*root = subrange;
|
||||
}
|
||||
|
||||
void PPCRecRARange_removeLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
raLivenessSubrange_t* tempPrev = subrange->link_sameVirtualRegisterGPR.prev;
|
||||
if (subrange->link_sameVirtualRegisterGPR.prev)
|
||||
subrange->link_sameVirtualRegisterGPR.prev->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next;
|
||||
else
|
||||
(*root) = subrange->link_sameVirtualRegisterGPR.next;
|
||||
if (subrange->link_sameVirtualRegisterGPR.next)
|
||||
subrange->link_sameVirtualRegisterGPR.next->link_sameVirtualRegisterGPR.prev = tempPrev;
|
||||
#ifndef PUBLIC_RELEASE
|
||||
subrange->link_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1;
|
||||
subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1;
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRARange_removeLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
raLivenessSubrange_t* tempPrev = subrange->link_segmentSubrangesGPR.prev;
|
||||
if (subrange->link_segmentSubrangesGPR.prev)
|
||||
subrange->link_segmentSubrangesGPR.prev->link_segmentSubrangesGPR.next = subrange->link_segmentSubrangesGPR.next;
|
||||
else
|
||||
(*root) = subrange->link_segmentSubrangesGPR.next;
|
||||
if (subrange->link_segmentSubrangesGPR.next)
|
||||
subrange->link_segmentSubrangesGPR.next->link_segmentSubrangesGPR.prev = tempPrev;
|
||||
#ifndef PUBLIC_RELEASE
|
||||
subrange->link_segmentSubrangesGPR.prev = (raLivenessSubrange_t*)1;
|
||||
subrange->link_segmentSubrangesGPR.next = (raLivenessSubrange_t*)1;
|
||||
#endif
|
||||
}
|
||||
|
||||
MemoryPoolPermanentObjects<raLivenessRange_t> memPool_livenessRange(4096);
|
||||
MemoryPoolPermanentObjects<raLivenessSubrange_t> memPool_livenessSubrange(4096);
|
||||
|
||||
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name)
|
||||
{
|
||||
raLivenessRange_t* livenessRange = memPool_livenessRange.acquireObj();
|
||||
livenessRange->list_subranges.resize(0);
|
||||
livenessRange->virtualRegister = virtualRegister;
|
||||
livenessRange->name = name;
|
||||
livenessRange->physicalRegister = -1;
|
||||
ppcImlGenContext->raInfo.list_ranges.push_back(livenessRange);
|
||||
return livenessRange;
|
||||
}
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex)
|
||||
{
|
||||
raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj();
|
||||
livenessSubrange->list_locations.resize(0);
|
||||
livenessSubrange->range = range;
|
||||
livenessSubrange->imlSegment = imlSegment;
|
||||
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->start, imlSegment, startIndex);
|
||||
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->end, imlSegment, endIndex);
|
||||
// default values
|
||||
livenessSubrange->hasStore = false;
|
||||
livenessSubrange->hasStoreDelayed = false;
|
||||
livenessSubrange->lastIterationIndex = 0;
|
||||
livenessSubrange->subrangeBranchNotTaken = nullptr;
|
||||
livenessSubrange->subrangeBranchTaken = nullptr;
|
||||
livenessSubrange->_noLoad = false;
|
||||
// add to range
|
||||
range->list_subranges.push_back(livenessSubrange);
|
||||
// add to segment
|
||||
PPCRecRARange_addLink_perVirtualGPR(&(imlSegment->raInfo.linkedList_perVirtualGPR[range->virtualRegister]), livenessSubrange);
|
||||
PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange);
|
||||
return livenessSubrange;
|
||||
}
|
||||
|
||||
void _unlinkSubrange(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = subrange->imlSegment;
|
||||
PPCRecRARange_removeLink_perVirtualGPR(&imlSegment->raInfo.linkedList_perVirtualGPR[subrange->range->virtualRegister], subrange);
|
||||
PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
_unlinkSubrange(subrange);
|
||||
subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange));
|
||||
subrange->list_locations.clear();
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
|
||||
memPool_livenessSubrange.releaseObj(subrange);
|
||||
}
|
||||
|
||||
void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
_unlinkSubrange(subrange);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
|
||||
memPool_livenessSubrange.releaseObj(subrange);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
|
||||
}
|
||||
ppcImlGenContext->raInfo.list_ranges.erase(std::find(ppcImlGenContext->raInfo.list_ranges.begin(), ppcImlGenContext->raInfo.list_ranges.end(), range));
|
||||
memPool_livenessRange.releaseObj(range);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
|
||||
}
|
||||
memPool_livenessRange.releaseObj(range);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext, range);
|
||||
}
|
||||
ppcImlGenContext->raInfo.list_ranges.clear();
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange)
|
||||
{
|
||||
cemu_assert_debug(range != absorbedRange);
|
||||
cemu_assert_debug(range->virtualRegister == absorbedRange->virtualRegister);
|
||||
// move all subranges from absorbedRange to range
|
||||
for (auto& subrange : absorbedRange->list_subranges)
|
||||
{
|
||||
range->list_subranges.push_back(subrange);
|
||||
subrange->range = range;
|
||||
}
|
||||
absorbedRange->list_subranges.clear();
|
||||
PPCRecRA_deleteRange(ppcImlGenContext, absorbedRange);
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange)
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
PPCRecRA_debugValidateSubrange(absorbedSubrange);
|
||||
if (subrange->imlSegment != absorbedSubrange->imlSegment)
|
||||
assert_dbg();
|
||||
if (subrange->end.index > absorbedSubrange->start.index)
|
||||
assert_dbg();
|
||||
if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
|
||||
assert_dbg();
|
||||
if (subrange == absorbedSubrange)
|
||||
assert_dbg();
|
||||
#endif
|
||||
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
|
||||
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
|
||||
|
||||
// merge usage locations
|
||||
for (auto& location : absorbedSubrange->list_locations)
|
||||
{
|
||||
subrange->list_locations.push_back(location);
|
||||
}
|
||||
absorbedSubrange->list_locations.clear();
|
||||
|
||||
subrange->end.index = absorbedSubrange->end.index;
|
||||
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
|
||||
PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange);
|
||||
}
|
||||
|
||||
// remove all inter-segment connections from the range and split it into local ranges (also removes empty ranges)
|
||||
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
if (range->list_subranges.size() == 1)
|
||||
assert_dbg();
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
continue;
|
||||
raLivenessRange_t* newRange = PPCRecRA_createRangeBase(ppcImlGenContext, range->virtualRegister, range->name);
|
||||
raLivenessSubrange_t* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, newRange, subrange->imlSegment, subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1);
|
||||
// copy locations
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
newSubrange->list_locations.push_back(location);
|
||||
}
|
||||
}
|
||||
// remove original range
|
||||
PPCRecRA_deleteRange(ppcImlGenContext, range);
|
||||
}
|
||||
|
||||
#ifndef PUBLIC_RELEASE
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
// validate subrange
|
||||
if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken)
|
||||
assert_dbg();
|
||||
if (subrange->subrangeBranchNotTaken && subrange->subrangeBranchNotTaken->imlSegment != subrange->imlSegment->nextSegmentBranchNotTaken)
|
||||
assert_dbg();
|
||||
}
|
||||
#else
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {}
|
||||
#endif
|
||||
|
||||
// split subrange at the given index
|
||||
// After the split there will be two ranges/subranges:
|
||||
// head -> subrange is shortned to end at splitIndex
|
||||
// tail -> a new subrange that reaches from splitIndex to the end of the original subrange
|
||||
// if head has a physical register assigned it will not carry over to tail
|
||||
// The return value is the tail subrange
|
||||
// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations
|
||||
// Ranges that begin at RA_INTER_RANGE_START are allowed and can be split
|
||||
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole)
|
||||
{
|
||||
// validation
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START)
|
||||
assert_dbg();
|
||||
if (subrange->start.index >= splitIndex)
|
||||
assert_dbg();
|
||||
if (subrange->end.index <= splitIndex)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// create tail
|
||||
raLivenessRange_t* tailRange = PPCRecRA_createRangeBase(ppcImlGenContext, subrange->range->virtualRegister, subrange->range->name);
|
||||
raLivenessSubrange_t* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, tailRange, subrange->imlSegment, splitIndex, subrange->end.index);
|
||||
// copy locations
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if (location.index >= splitIndex)
|
||||
tailSubrange->list_locations.push_back(location);
|
||||
}
|
||||
// remove tail locations from head
|
||||
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
|
||||
{
|
||||
raLivenessLocation_t* location = subrange->list_locations.data() + i;
|
||||
if (location->index >= splitIndex)
|
||||
{
|
||||
subrange->list_locations.resize(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// adjust start/end
|
||||
if (trimToHole)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
subrange->end.index = subrange->start.index+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
subrange->end.index = subrange->list_locations.back().index + 1;
|
||||
}
|
||||
if (tailSubrange->list_locations.empty())
|
||||
{
|
||||
assert_dbg(); // should not happen? (In this case we can just avoid generating a tail at all)
|
||||
}
|
||||
else
|
||||
{
|
||||
tailSubrange->start.index = tailSubrange->list_locations.front().index;
|
||||
}
|
||||
}
|
||||
return tailSubrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
return;
|
||||
}
|
||||
raLivenessLocation_t* lastLocation = subrange->list_locations.data() + (subrange->list_locations.size() - 1);
|
||||
cemu_assert_debug(lastLocation->index <= index);
|
||||
if (lastLocation->index == index)
|
||||
{
|
||||
// update
|
||||
lastLocation->isRead = lastLocation->isRead || isRead;
|
||||
lastLocation->isWrite = lastLocation->isWrite || isWrite;
|
||||
return;
|
||||
}
|
||||
// add new
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
}
|
||||
|
||||
sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
sint32 v = imlSegment->loopDepth + 1;
|
||||
v *= 5;
|
||||
return v*v; // 25, 100, 225, 400
|
||||
}
|
||||
|
||||
// calculate cost of entire range
|
||||
// ignores data flow and does not detect avoidable reads/stores
|
||||
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range)
|
||||
{
|
||||
sint32 cost = 0;
|
||||
|
||||
// todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code).
|
||||
|
||||
// currently we calculate the cost based on the most expensive entry/exit point
|
||||
|
||||
sint32 mostExpensiveRead = 0;
|
||||
sint32 mostExpensiveWrite = 0;
|
||||
sint32 readCount = 0;
|
||||
sint32 writeCount = 0;
|
||||
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->start.index != RA_INTER_RANGE_START)
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
|
||||
readCount++;
|
||||
}
|
||||
if (subrange->end.index != RA_INTER_RANGE_END)
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
|
||||
writeCount++;
|
||||
}
|
||||
}
|
||||
cost = mostExpensiveRead + mostExpensiveWrite;
|
||||
cost = cost + (readCount + writeCount) / 10;
|
||||
return cost;
|
||||
}
|
||||
|
||||
// calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range)
|
||||
{
|
||||
sint32 cost = -PPCRecRARange_estimateCost(range);
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
continue;
|
||||
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // we assume a read and a store
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex)
|
||||
{
|
||||
// validation
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (subrange->end.index == RA_INTER_RANGE_END)
|
||||
assert_dbg();
|
||||
#endif
|
||||
|
||||
sint32 cost = 0;
|
||||
// find split position in location list
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
assert_dbg(); // should not happen?
|
||||
return 0;
|
||||
}
|
||||
if (splitIndex <= subrange->list_locations.front().index)
|
||||
return 0;
|
||||
if (splitIndex > subrange->list_locations.back().index)
|
||||
return 0;
|
||||
|
||||
// todo - determine exact cost of split subranges
|
||||
|
||||
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store
|
||||
|
||||
//for (sint32 f = 0; f < subrange->list_locations.size(); f++)
|
||||
//{
|
||||
// raLivenessLocation_t* location = subrange->list_locations.data() + f;
|
||||
// if (location->index >= splitIndex)
|
||||
// {
|
||||
// ...
|
||||
// return cost;
|
||||
// }
|
||||
//}
|
||||
|
||||
return cost;
|
||||
}
|
27
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
Normal file
27
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name);
|
||||
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex);
|
||||
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange);
|
||||
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
|
||||
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange);
|
||||
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
|
||||
|
||||
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange);
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole = false);
|
||||
|
||||
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite);
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange);
|
||||
|
||||
// cost estimation
|
||||
sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment);
|
||||
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range);
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range);
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex);
|
||||
|
||||
// special values to mark the index of ranges that reach across the segment border
|
||||
#define RA_INTER_RANGE_START (-1)
|
||||
#define RA_INTER_RANGE_END (0x70000000)
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,414 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "PPCRecompilerImlRanges.h"
|
||||
#include <queue>
|
||||
|
||||
bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX);
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
imlSegment->raDistances.reg[i].usageStart = INT_MAX;
|
||||
imlSegment->raDistances.reg[i].usageEnd = INT_MIN;
|
||||
}
|
||||
// scan instructions for usage range
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR);
|
||||
imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction
|
||||
imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index+1); // index after instruction
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// for each register calculate min/max index of usage range within each segment
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, ppcImlGenContext->segmentList[s]);
|
||||
}
|
||||
}
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range)
|
||||
{
|
||||
if (imlSegment->raDistances.isProcessed[vGPR])
|
||||
{
|
||||
// return already existing segment
|
||||
return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR];
|
||||
}
|
||||
imlSegment->raDistances.isProcessed[vGPR] = true;
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
return nullptr;
|
||||
// create subrange
|
||||
cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr);
|
||||
raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd);
|
||||
// traverse forward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
}
|
||||
// traverse backward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range);
|
||||
}
|
||||
}
|
||||
// return subrange
|
||||
return subrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
if( _isRangeDefined(imlSegment, i) == false )
|
||||
continue;
|
||||
if( imlSegment->raDistances.isProcessed[i])
|
||||
continue;
|
||||
raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]);
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range);
|
||||
}
|
||||
// create lookup table of ranges
|
||||
raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i];
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// parse instructions and convert to locations
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
// handle accessed GPR
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
bool isWrite = (t == 3);
|
||||
// add location
|
||||
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite);
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (index < vGPR2Subrange[virtualRegister]->start.index)
|
||||
assert_dbg();
|
||||
if (index+1 > vGPR2Subrange[virtualRegister]->end.index)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
return;
|
||||
}
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START;
|
||||
}
|
||||
else
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
}
|
||||
// propagate backwards
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR);
|
||||
}
|
||||
}
|
||||
|
||||
void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (routeDepth < 2)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// extend starting range to end of segment
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR);
|
||||
// extend all the connecting segments in both directions
|
||||
for (sint32 i = 1; i < (routeDepth - 1); i++)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
}
|
||||
// extend the final segment towards the beginning
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth-1], vGPR);
|
||||
}
|
||||
|
||||
void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
if (routeDepth >= 64)
|
||||
{
|
||||
forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress);
|
||||
return;
|
||||
}
|
||||
route[routeDepth] = currentSegment;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX)
|
||||
{
|
||||
// measure distance to end of segment
|
||||
distanceLeft -= currentSegment->imlListCount;
|
||||
if (distanceLeft > 0)
|
||||
{
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// measure distance to range
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (distanceLeft < currentSegment->imlListCount)
|
||||
return; // range too far away
|
||||
}
|
||||
else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft)
|
||||
return; // out of range
|
||||
// found close range -> connect ranges
|
||||
_PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR)
|
||||
{
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// count instructions to end of initial segment
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START)
|
||||
assert_dbg();
|
||||
sint32 instructionsUntilEndOfSeg;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
instructionsUntilEndOfSeg = 0;
|
||||
else
|
||||
instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd;
|
||||
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (instructionsUntilEndOfSeg < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
|
||||
if (remainingScanDist <= 0)
|
||||
return; // can't reach end
|
||||
|
||||
// also dont forget: Extending is easier if we allow 'non symetric' branches. E.g. register range one enters one branch
|
||||
PPCRecImlSegment_t* route[64];
|
||||
route[0] = currentSegment;
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if(imlSegment->raDistances.reg[i].usageStart == INT_MAX)
|
||||
continue; // not used
|
||||
// check and extend if possible
|
||||
PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i);
|
||||
}
|
||||
#ifndef PUBLIC_RELEASE
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
assert_dbg();
|
||||
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
std::vector<PPCRecImlSegment_t*> list_segments;
|
||||
list_segments.reserve(1000);
|
||||
sint32 index = 0;
|
||||
imlSegment->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(imlSegment);
|
||||
while (index < list_segments.size())
|
||||
{
|
||||
PPCRecImlSegment_t* currentSegment = list_segments[index];
|
||||
PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment);
|
||||
// follow flow
|
||||
if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
if (imlSegment->list_prevSegments.empty())
|
||||
{
|
||||
if (imlSegment->raRangeExtendProcessed)
|
||||
assert_dbg(); // should not happen
|
||||
PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
auto localLoopDepth = imlSegment->loopDepth;
|
||||
if( localLoopDepth <= 0 )
|
||||
continue; // not inside a loop
|
||||
// look for loop exit
|
||||
bool hasLoopExit = false;
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if(hasLoopExit == false)
|
||||
continue;
|
||||
|
||||
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END)
|
||||
continue; // range not set or does not reach end of segment
|
||||
if(imlSegment->nextSegmentBranchTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i);
|
||||
if(imlSegment->nextSegmentBranchNotTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// merge close ranges
|
||||
PPCRecRA_mergeCloseRangesV2(ppcImlGenContext);
|
||||
// extra pass to move register stores out of loops
|
||||
PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext);
|
||||
// calculate liveness ranges
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
bool isRead = false;
|
||||
bool isWritten = false;
|
||||
bool isOverwritten = false;
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if (location.isRead)
|
||||
{
|
||||
isRead = true;
|
||||
}
|
||||
if (location.isWrite)
|
||||
{
|
||||
if (isRead == false)
|
||||
isOverwritten = true;
|
||||
isWritten = true;
|
||||
}
|
||||
}
|
||||
subrange->_noLoad = isOverwritten;
|
||||
subrange->hasStore = isWritten;
|
||||
|
||||
if (subrange->start.index == RA_INTER_RANGE_START)
|
||||
subrange->_noLoad = true;
|
||||
}
|
||||
|
||||
void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange);
|
||||
|
||||
void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore
|
||||
// first do a per-subrange pass
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
PPCRecRA_analyzeSubrangeDataDependencyV2(subrange);
|
||||
}
|
||||
}
|
||||
// then do a second pass where we scan along subrange flow
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm
|
||||
{
|
||||
_analyzeRangeDataFlow(subrange);
|
||||
}
|
||||
}
|
||||
}
|
173
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
Normal file
173
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
Normal file
|
@ -0,0 +1,173 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
|
||||
PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
if( ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset )
|
||||
{
|
||||
return ppcImlGenContext->segmentList[s];
|
||||
}
|
||||
}
|
||||
debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
// make sure segments aren't already linked
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
|
||||
return;
|
||||
// add as next segment for source
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken != NULL)
|
||||
assert_dbg();
|
||||
imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst;
|
||||
// add as previous segment for destination
|
||||
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
// make sure segments aren't already linked
|
||||
if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
|
||||
return;
|
||||
// add as next segment for source
|
||||
if (imlSegmentSrc->nextSegmentBranchTaken != NULL)
|
||||
assert_dbg();
|
||||
imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst;
|
||||
// add as previous segment for destination
|
||||
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
|
||||
{
|
||||
imlSegmentSrc->nextSegmentBranchNotTaken = NULL;
|
||||
}
|
||||
else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
|
||||
{
|
||||
imlSegmentSrc->nextSegmentBranchTaken = NULL;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
|
||||
bool matchFound = false;
|
||||
for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++)
|
||||
{
|
||||
if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc)
|
||||
{
|
||||
imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin()+i);
|
||||
matchFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matchFound == false)
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
/*
|
||||
* Replaces all links to segment orig with linkts to segment new
|
||||
*/
|
||||
void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew)
|
||||
{
|
||||
while (imlSegmentOrig->list_prevSegments.size() != 0)
|
||||
{
|
||||
PPCRecImlSegment_t* prevSegment = imlSegmentOrig->list_prevSegments[0];
|
||||
if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig)
|
||||
{
|
||||
PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(prevSegment, imlSegmentNew);
|
||||
}
|
||||
else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig)
|
||||
{
|
||||
PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig);
|
||||
PPCRecompilerIml_setLinkBranchTaken(prevSegment, imlSegmentNew);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
|
||||
bool isLastSegment = (s+1)>=ppcImlGenContext->segmentListCount;
|
||||
PPCRecImlSegment_t* nextSegment = isLastSegment?NULL:ppcImlGenContext->segmentList[s+1];
|
||||
// handle empty segment
|
||||
if( imlSegment->imlListCount == 0 )
|
||||
{
|
||||
if (isLastSegment == false)
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList[s+1]); // continue execution to next segment
|
||||
else
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
continue;
|
||||
}
|
||||
// check last instruction of segment
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+(imlSegment->imlListCount-1);
|
||||
if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
|
||||
{
|
||||
// find destination segment by ppc jump address
|
||||
PPCRecImlSegment_t* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress);
|
||||
if( jumpDestSegment )
|
||||
{
|
||||
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment);
|
||||
PPCRecompilerIml_setLinkBranchTaken(imlSegment, jumpDestSegment);
|
||||
}
|
||||
else
|
||||
{
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
}
|
||||
else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
|
||||
{
|
||||
// currently we assume that the next segment is unknown for all macros
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// all other instruction types do not branch
|
||||
//imlSegment->nextSegment[0] = nextSegment;
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment);
|
||||
//imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
sint32 initialSegmentCount = ppcImlGenContext->segmentListCount;
|
||||
for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i];
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
{
|
||||
// spawn new segment at end
|
||||
PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentListCount, 1);
|
||||
PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount-1];
|
||||
entrySegment->isEnterable = true;
|
||||
entrySegment->enterPPCAddress = imlSegment->enterPPCAddress;
|
||||
// create jump instruction
|
||||
PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1);
|
||||
PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList + 0);
|
||||
PPCRecompilerIml_setLinkBranchTaken(entrySegment, imlSegment);
|
||||
// remove enterable flag from original segment
|
||||
imlSegment->isEnterable = false;
|
||||
imlSegment->enterPPCAddress = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
if (imlSegment->imlListCount == 0)
|
||||
return nullptr;
|
||||
return imlSegment->imlList + (imlSegment->imlListCount - 1);
|
||||
}
|
2682
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp
Normal file
2682
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp
Normal file
File diff suppressed because it is too large
Load diff
332
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h
Normal file
332
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h
Normal file
|
@ -0,0 +1,332 @@
|
|||
|
||||
typedef struct
|
||||
{
|
||||
uint32 offset;
|
||||
uint8 type;
|
||||
void* extraInfo;
|
||||
}x64RelocEntry_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8* codeBuffer;
|
||||
sint32 codeBufferIndex;
|
||||
sint32 codeBufferSize;
|
||||
// cr state
|
||||
sint32 activeCRRegister; // current x86 condition flags reflect this cr* register
|
||||
sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned)
|
||||
// relocate offsets
|
||||
x64RelocEntry_t* relocateOffsetTable;
|
||||
sint32 relocateOffsetTableSize;
|
||||
sint32 relocateOffsetTableCount;
|
||||
}x64GenContext_t;
|
||||
|
||||
// Some of these are defined by winnt.h and gnu headers
|
||||
#undef REG_EAX
|
||||
#undef REG_ECX
|
||||
#undef REG_EDX
|
||||
#undef REG_EBX
|
||||
#undef REG_ESP
|
||||
#undef REG_EBP
|
||||
#undef REG_ESI
|
||||
#undef REG_EDI
|
||||
#undef REG_NONE
|
||||
#undef REG_RAX
|
||||
#undef REG_RCX
|
||||
#undef REG_RDX
|
||||
#undef REG_RBX
|
||||
#undef REG_RSP
|
||||
#undef REG_RBP
|
||||
#undef REG_RSI
|
||||
#undef REG_RDI
|
||||
#undef REG_R8
|
||||
#undef REG_R9
|
||||
#undef REG_R10
|
||||
#undef REG_R11
|
||||
#undef REG_R12
|
||||
#undef REG_R13
|
||||
#undef REG_R14
|
||||
#undef REG_R15
|
||||
|
||||
#define REG_EAX 0
|
||||
#define REG_ECX 1
|
||||
#define REG_EDX 2
|
||||
#define REG_EBX 3
|
||||
#define REG_ESP 4 // reserved for low half of hCPU pointer
|
||||
#define REG_EBP 5
|
||||
#define REG_ESI 6
|
||||
#define REG_EDI 7
|
||||
#define REG_NONE -1
|
||||
|
||||
#define REG_RAX 0
|
||||
#define REG_RCX 1
|
||||
#define REG_RDX 2
|
||||
#define REG_RBX 3
|
||||
#define REG_RSP 4 // reserved for hCPU pointer
|
||||
#define REG_RBP 5
|
||||
#define REG_RSI 6
|
||||
#define REG_RDI 7
|
||||
#define REG_R8 8
|
||||
#define REG_R9 9
|
||||
#define REG_R10 10
|
||||
#define REG_R11 11
|
||||
#define REG_R12 12
|
||||
#define REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
|
||||
#define REG_R14 14 // reserved as temporary register
|
||||
#define REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
|
||||
|
||||
#define REG_AL 0
|
||||
#define REG_CL 1
|
||||
#define REG_DL 2
|
||||
#define REG_BL 3
|
||||
#define REG_AH 4
|
||||
#define REG_CH 5
|
||||
#define REG_DH 6
|
||||
#define REG_BH 7
|
||||
|
||||
// reserved registers
|
||||
#define REG_RESV_TEMP (REG_R14)
|
||||
#define REG_RESV_HCPU (REG_RSP)
|
||||
#define REG_RESV_MEMBASE (REG_R13)
|
||||
#define REG_RESV_RECDATA (REG_R15)
|
||||
|
||||
// reserved floating-point registers
|
||||
#define REG_RESV_FPR_TEMP (15)
|
||||
|
||||
|
||||
extern sint32 x64Gen_registerMap[12];
|
||||
|
||||
#define tempToRealRegister(__x) (x64Gen_registerMap[__x])
|
||||
#define tempToRealFPRRegister(__x) (__x)
|
||||
#define reg32ToReg16(__x) (__x)
|
||||
|
||||
enum
|
||||
{
|
||||
X86_CONDITION_EQUAL, // or zero
|
||||
X86_CONDITION_NOT_EQUAL, // or not zero
|
||||
X86_CONDITION_SIGNED_LESS, // or not greater/equal
|
||||
X86_CONDITION_SIGNED_GREATER, // or not less/equal
|
||||
X86_CONDITION_SIGNED_LESS_EQUAL, // or not greater
|
||||
X86_CONDITION_SIGNED_GREATER_EQUAL, // or not less
|
||||
X86_CONDITION_UNSIGNED_BELOW, // or not above/equal
|
||||
X86_CONDITION_UNSIGNED_ABOVE, // or not below/equal
|
||||
X86_CONDITION_UNSIGNED_BELOW_EQUAL, // or not above
|
||||
X86_CONDITION_UNSIGNED_ABOVE_EQUAL, // or not below
|
||||
X86_CONDITION_CARRY, // carry flag must be set
|
||||
X86_CONDITION_NOT_CARRY, // carry flag must not be set
|
||||
X86_CONDITION_SIGN, // sign flag must be set
|
||||
X86_CONDITION_NOT_SIGN, // sign flag must not be set
|
||||
X86_CONDITION_PARITY, // parity flag must be set
|
||||
X86_CONDITION_NONE, // no condition, jump always
|
||||
};
|
||||
|
||||
#define PPCREC_CR_TEMPORARY (8) // never stored
|
||||
#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI)
|
||||
#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
|
||||
#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
|
||||
|
||||
#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction
|
||||
#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset
|
||||
#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment
|
||||
|
||||
#define PPC_X64_GPR_USABLE_REGISTERS (16-4)
|
||||
#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register
|
||||
|
||||
|
||||
bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext);
|
||||
|
||||
void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset);
|
||||
|
||||
void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
|
||||
// ASM gen
|
||||
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v);
|
||||
void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v);
|
||||
void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v);
|
||||
|
||||
void x64Emit_mov_reg32_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
|
||||
void x64Emit_mov_mem32_reg32(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
|
||||
void x64Emit_mov_mem64_reg64(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
|
||||
void x64Emit_mov_reg64_mem64(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
|
||||
void x64Emit_mov_reg64_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
|
||||
void x64Emit_mov_mem32_reg64(x64GenContext_t* x64GenContext, sint32 memBaseReg64, sint32 memOffset, sint32 srcReg);
|
||||
void x64Emit_mov_reg64_mem64(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
|
||||
void x64Emit_mov_reg32_mem32(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
|
||||
void x64Emit_mov_reg64b_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
|
||||
void x64Emit_movZX_reg32_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memIndexReg64, sint32 memOffset);
|
||||
void x64Emit_movZX_reg64_mem8(x64GenContext_t* x64GenContext, sint32 destReg, sint32 memBaseReg64, sint32 memOffset);
|
||||
|
||||
void x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
void x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
void x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_mov_mem32Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32);
|
||||
void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32);
|
||||
void x64Gen_mov_mem8Reg64_imm8(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint8 dataImmU8);
|
||||
|
||||
void x64Gen_mov_reg64_imm64(x64GenContext_t* x64GenContext, sint32 destRegister, uint64 immU64);
|
||||
void x64Gen_mov_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 destRegister, uint64 immU32);
|
||||
void x64Gen_mov_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
|
||||
void x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64);
|
||||
|
||||
void x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, uint32 conditionType, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_mov_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_xchg_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_movZeroExtend_reg64Low32_reg64Low8(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
|
||||
void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
|
||||
void x64Gen_and_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
|
||||
void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister);
|
||||
|
||||
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_add_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_add_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_add_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_sub_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_sub_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, sint32 memImmS32, uint64 immU32);
|
||||
void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32);
|
||||
void x64Gen_imul_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 operandRegister);
|
||||
void x64Gen_idiv_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
void x64Gen_div_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
void x64Gen_imul_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
void x64Gen_mul_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
void x64Gen_and_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_and_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_test_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_test_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, sint32 immS32);
|
||||
void x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 memRegister, sint32 memImmS32);
|
||||
void x64Gen_or_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_or_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_xor_reg32_reg32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_xor_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_xor_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
|
||||
void x64Gen_rol_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_rol_reg64Low32_cl(x64GenContext_t* x64GenContext, sint32 srcRegister);
|
||||
void x64Gen_rol_reg64Low16_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_rol_reg64_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_shl_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_shr_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
void x64Gen_sar_reg64Low32_imm8(x64GenContext_t* x64GenContext, sint32 srcRegister, sint8 immS8);
|
||||
|
||||
void x64Gen_not_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_neg_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_cdq(x64GenContext_t* x64GenContext);
|
||||
|
||||
void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
|
||||
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, sint32 immS32);
|
||||
void x64Gen_setcc_mem8(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 memoryRegister, uint32 memoryImmU32);
|
||||
void x64Gen_setcc_reg64b(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 dataRegister);
|
||||
void x64Gen_bt_mem8(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32, uint8 bitIndex);
|
||||
void x64Gen_cmc(x64GenContext_t* x64GenContext);
|
||||
|
||||
void x64Gen_jmp_imm32(x64GenContext_t* x64GenContext, uint32 destImm32);
|
||||
void x64Gen_jmp_memReg64(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 immU32);
|
||||
void x64Gen_jmpc_far(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 relativeDest);
|
||||
void x64Gen_jmpc_near(x64GenContext_t* x64GenContext, sint32 conditionType, sint32 relativeDest);
|
||||
|
||||
void x64Gen_push_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
|
||||
void x64Gen_pop_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_jmp_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
|
||||
void x64Gen_call_reg64(x64GenContext_t* x64GenContext, sint32 srcRegister);
|
||||
void x64Gen_ret(x64GenContext_t* x64GenContext);
|
||||
void x64Gen_int3(x64GenContext_t* x64GenContext);
|
||||
|
||||
// floating-point (SIMD/SSE) gen
|
||||
void x64Gen_movaps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSource);
|
||||
void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc, uint8 imm8);
|
||||
void x64Gen_addsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_addpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_subsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_subpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_mulsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_divsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_divpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_comisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32);
|
||||
void x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32);
|
||||
void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memReg, sint32 memImmS32);
|
||||
void x64Gen_cvtsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_sqrtsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_sqrtpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
|
||||
void x64Gen_movd_xmmReg_reg64Low32(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
|
||||
void x64Gen_movd_reg64Low32_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movq_xmmReg_reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
|
||||
void x64Gen_movq_reg64_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 xmmRegisterSrc);
|
||||
|
||||
// AVX
|
||||
|
||||
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
|
||||
void x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
|
||||
void x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB);
|
||||
|
||||
// BMI
|
||||
void x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
|
||||
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
||||
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
49
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
Normal file
49
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
|
||||
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
void _x64Gen_vex128_nds(x64GenContext_t* x64GenContext, uint8 opcodeMap, uint8 additionalOperand, uint8 pp, uint8 vex_ext, uint8 vex_r, uint8 vex_b, uint8 opcode)
|
||||
{
|
||||
if(vex_b != 0)
|
||||
x64Gen_writeU8(x64GenContext, 0xC4); // three byte VEX
|
||||
else
|
||||
x64Gen_writeU8(x64GenContext, 0xC5); // two byte VEX
|
||||
|
||||
if (vex_b != 0)
|
||||
{
|
||||
uint8 vex_x = 0;
|
||||
x64Gen_writeU8(x64GenContext, (vex_r ? 0x00 : 0x80) | (vex_x ? 0x00 : 0x40) | (vex_b ? 0x00 : 0x20) | 1);
|
||||
}
|
||||
|
||||
x64Gen_writeU8(x64GenContext, (vex_ext<<7) | (((~additionalOperand)&0xF)<<3) | pp);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, opcode);
|
||||
}
|
||||
|
||||
#define VEX_PP_0F 0 // guessed
|
||||
#define VEX_PP_66_0F 1
|
||||
#define VEX_PP_F3_0F 2 // guessed
|
||||
#define VEX_PP_F2_0F 3 // guessed
|
||||
|
||||
|
||||
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
|
||||
{
|
||||
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x6D);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
|
||||
}
|
||||
|
||||
void x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
|
||||
{
|
||||
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x15);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
|
||||
}
|
||||
|
||||
void x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
|
||||
{
|
||||
_x64Gen_vex128_nds(x64GenContext, 0, srcRegisterA, VEX_PP_66_0F, dstRegister < 8 ? 1 : 0, (dstRegister >= 8 && srcRegisterB >= 8) ? 1 : 0, srcRegisterB < 8 ? 0 : 1, 0x5C);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (srcRegisterB & 7) + (dstRegister & 7) * 8);
|
||||
}
|
80
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
Normal file
80
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
|
||||
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
|
||||
{
|
||||
// MOVBE <dstReg64> (low dword), DWORD [<reg64> + <reg64> + <imm64>]
|
||||
if( dstRegister >= 8 && memRegisterA64 >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x47);
|
||||
else if( memRegisterA64 >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x43);
|
||||
else if( dstRegister >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x42);
|
||||
else if( dstRegister >= 8 && memRegisterA64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x45);
|
||||
else if( dstRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x44);
|
||||
else if( memRegisterA64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
else if( memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x42);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x38);
|
||||
x64Gen_writeU8(x64GenContext, 0xF0);
|
||||
_x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegisterA64, memRegisterB64, memImmS32);
|
||||
}
|
||||
|
||||
void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
|
||||
{
|
||||
// MOVBE <dstReg64> (low word), WORD [<reg64> + <reg64> + <imm64>]
|
||||
// note: Unlike the 32bit version this instruction does not set the upper 32bits of the 64bit register to 0
|
||||
x64Gen_writeU8(x64GenContext, 0x66); // 16bit prefix
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, dstRegister, memRegisterA64, memRegisterB64, memImmS32);
|
||||
}
|
||||
|
||||
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister)
|
||||
{
|
||||
// MOVBE DWORD [<reg64> + <reg64> + <imm64>], <srcReg64> (low dword)
|
||||
if( srcRegister >= 8 && memRegisterA64 >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x47);
|
||||
else if( memRegisterA64 >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x43);
|
||||
else if( srcRegister >= 8 && memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x42);
|
||||
else if( srcRegister >= 8 && memRegisterA64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x45);
|
||||
else if( srcRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x44);
|
||||
else if( memRegisterA64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
else if( memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x42);
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x38);
|
||||
x64Gen_writeU8(x64GenContext, 0xF1);
|
||||
_x64Gen_writeMODRMDeprecated(x64GenContext, srcRegister, memRegisterA64, memRegisterB64, memImmS32);
|
||||
}
|
||||
|
||||
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
// SHRX reg64, reg64, reg64
|
||||
x64Gen_writeU8(x64GenContext, 0xC4);
|
||||
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
|
||||
x64Gen_writeU8(x64GenContext, 0xFB - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
||||
|
||||
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
// SHLX reg64, reg64, reg64
|
||||
x64Gen_writeU8(x64GenContext, 0xC4);
|
||||
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
|
||||
x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
1244
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp
Normal file
1244
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp
Normal file
File diff suppressed because it is too large
Load diff
1885
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
Normal file
1885
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
Normal file
File diff suppressed because it is too large
Load diff
752
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
Normal file
752
src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
Normal file
|
@ -0,0 +1,752 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
|
||||
void x64Gen_genSSEVEXPrefix2(x64GenContext_t* x64GenContext, sint32 xmmRegister1, sint32 xmmRegister2, bool use64BitMode)
|
||||
{
|
||||
if( xmmRegister1 < 8 && xmmRegister2 < 8 && use64BitMode == false )
|
||||
return;
|
||||
uint8 v = 0x40;
|
||||
if( xmmRegister1 >= 8 )
|
||||
v |= 0x01;
|
||||
if( xmmRegister2 >= 8 )
|
||||
v |= 0x04;
|
||||
if( use64BitMode )
|
||||
v |= 0x08;
|
||||
x64Gen_writeU8(x64GenContext, v);
|
||||
}
|
||||
|
||||
void x64Gen_genSSEVEXPrefix1(x64GenContext_t* x64GenContext, sint32 xmmRegister, bool use64BitMode)
|
||||
{
|
||||
if( xmmRegister < 8 && use64BitMode == false )
|
||||
return;
|
||||
uint8 v = 0x40;
|
||||
if( use64BitMode )
|
||||
v |= 0x01;
|
||||
if( xmmRegister >= 8 )
|
||||
v |= 0x04;
|
||||
x64Gen_writeU8(x64GenContext, v);
|
||||
}
|
||||
|
||||
void x64Gen_movaps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSource)
|
||||
{
|
||||
// SSE
|
||||
// copy xmm register
|
||||
// MOVAPS <xmm>, <xmm>
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSource, xmmRegisterDest, false); // tested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x28); // alternative encoding: 0x29, source and destination register are exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSource&7));
|
||||
}
|
||||
|
||||
void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
// move two doubles from memory into xmm register
|
||||
// MOVUPD <xmm>, [<reg>+<imm>]
|
||||
if( memRegister == REG_ESP )
|
||||
{
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
// 66 0F 10 84 E4 23 01 00 00
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x10);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
// move two doubles from memory into xmm register
|
||||
// MOVUPD [<reg>+<imm>], <xmm>
|
||||
if( memRegister == REG_ESP )
|
||||
{
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x11);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x11);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE3
|
||||
// move one double from memory into lower and upper half of a xmm register
|
||||
if( memRegister == REG_RSP )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<reg>+<imm>]
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
if( xmmRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x44);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_R15 )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<reg>+<imm>]
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
// F2 41 0F 12 87 - 44 33 22 11
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegister, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<imm>]
|
||||
// 36 F2 0F 12 05 - 00 00 00 00
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0x36);
|
||||
//x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x12);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE3
|
||||
// move low double from xmm register into lower and upper half of a different xmm register
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE1
|
||||
// move high double from xmm register into lower and upper half of a different xmm register
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// move lower double from xmm register into lower half of a different xmm register, leave other half untouched
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10); // alternative encoding: 0x11, src and dest exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
// move lower 64bits (double) of xmm register to memory location
|
||||
if( memRegister == REG_NONE )
|
||||
{
|
||||
// MOVSD [<imm>], <xmm>
|
||||
// F2 0F 11 05 - 45 23 01 00
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
//x64Gen_genSSEVEXPrefix(x64GenContext, xmmRegister, 0, false);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x11);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+xmmRegister*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_RSP )
|
||||
{
|
||||
// MOVSD [RSP+<imm>], <xmm>
|
||||
// F2 0F 11 84 24 - 33 22 11 00
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x11);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE3
|
||||
// move one double from memory into lower half of a xmm register, leave upper half unchanged(?)
|
||||
if( memRegister == REG_NONE )
|
||||
{
|
||||
// MOVLPD <xmm>, [<imm>]
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0x12);
|
||||
//x64Gen_writeU8(x64GenContext, 0x05+(xmmRegister&7)*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
assert_dbg();
|
||||
}
|
||||
else if( memRegister == REG_RSP )
|
||||
{
|
||||
// MOVLPD <xmm>, [<reg64>+<imm>]
|
||||
// 66 0F 12 84 24 - 33 22 11 00
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x12);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x14);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x15);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc, uint8 imm8)
|
||||
{
|
||||
// SSE2
|
||||
// shuffled copy source to destination
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0xC6);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
x64Gen_writeU8(x64GenContext, imm8);
|
||||
}
|
||||
|
||||
void x64Gen_addsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// add bottom double of two xmm registers, leave upper quadword unchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x58);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_addpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// add both doubles of two xmm registers
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x58);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_subsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// subtract bottom double of two xmm registers, leave upper quadword unchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5C);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_subpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// subtract both doubles of two xmm registers
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5C);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_mulsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// multiply bottom double of two xmm registers, leave upper quadword unchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x59);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// multiply both doubles of two xmm registers
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x59);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if (memRegister == REG_NONE)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
else if (memRegister == REG_R14)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x59);
|
||||
x64Gen_writeU8(x64GenContext, 0x86 + (xmmRegister & 7) * 8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_divsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// divide bottom double of two xmm registers, leave upper quadword unchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5E);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_divpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// divide bottom and top double of two xmm registers
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5E);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_comisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// compare bottom doubles
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false); // untested
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2F);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// compare bottom double with double from memory location
|
||||
if( memoryReg == REG_R15 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2F);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// compare bottom doubles
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2E);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memoryReg, sint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// compare bottom float with float from memory location
|
||||
if (memoryReg == REG_R15)
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2F);
|
||||
x64Gen_writeU8(x64GenContext, 0x87 + (xmmRegisterDest & 7) * 8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// and xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, memReg, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x56);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// xor xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x57);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if (memRegister == REG_NONE)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
else if (memRegister == REG_R14)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x54);
|
||||
x64Gen_writeU8(x64GenContext, 0x86 + (xmmRegister & 7) * 8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// and xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x54);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// and xmm register with xmm register
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x54);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// doubleword integer compare
|
||||
if( memReg == REG_R15 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x76);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// convert two doubles into two 32-bit integers in bottom part of xmm register, reset upper 64 bits of destination register
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0xE6);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// convert double to truncated integer in general purpose register
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2C);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts bottom 64bit double to bottom 32bit single
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts two 64bit doubles to two 32bit singles in bottom half of register
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts two 32bit singles to two 64bit doubles
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts bottom 32bit single to bottom 64bit double
|
||||
x64Gen_writeU8(x64GenContext, 0xF3);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x5A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 memReg, sint32 memImmS32)
|
||||
{
|
||||
// SSE2
|
||||
// converts two signed 32bit integers to two doubles
|
||||
if( memReg == REG_RSP )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2A);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegisterDest&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, (uint32)memImmS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_cvtsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts bottom 64bit double to 32bit signed integer in general purpose register, round based on float-point control
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2D);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// converts bottom 64bit double to 32bit signed integer in general purpose register, always truncate
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, registerDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2C);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_sqrtsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// calculates square root of bottom double
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x51);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_sqrtpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// calculates square root of bottom and top double
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x51);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// approximates reciprocal of bottom 32bit single
|
||||
x64Gen_writeU8(x64GenContext, 0xF3);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, xmmRegisterSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x53);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if( memRegister == REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
else if( memRegister == 15 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0xF3);
|
||||
x64Gen_writeU8(x64GenContext, (xmmRegister<8)?0x41:0x45);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x59);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movd_xmmReg_reg64Low32(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
|
||||
{
|
||||
// SSE2
|
||||
// copy low 32bit of general purpose register into xmm register
|
||||
// MOVD <xmm>, <reg32>
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x6E); // alternative encoding: 0x29, source and destination register are exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movd_reg64Low32_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// copy low 32bit of general purpose register into xmm register
|
||||
// MOVD <reg32>, <xmm>
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerDest, xmmRegisterSrc, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x7E); // alternative encoding: 0x29, source and destination register are exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterSrc&7)*8+(registerDest&7));
|
||||
}
|
||||
|
||||
void x64Gen_movq_xmmReg_reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
|
||||
{
|
||||
// SSE2
|
||||
// copy general purpose register into xmm register
|
||||
// MOVD <xmm>, <reg64>
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x6E); // alternative encoding: 0x29, source and destination register are exchanged
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_movq_reg64_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
// copy general purpose register into xmm register
|
||||
// MOVD <xmm>, <reg64>
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerDst, xmmRegisterSrc, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x7E);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterSrc&7)*8+(registerDst&7));
|
||||
}
|
360
src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp
Normal file
360
src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp
Normal file
|
@ -0,0 +1,360 @@
|
|||
|
||||
|
||||
template<uint8 op0, bool rex64Bit = false>
|
||||
class x64_opc_1byte
|
||||
{
|
||||
public:
|
||||
static void emitBytes(x64GenContext_t* x64GenContext)
|
||||
{
|
||||
// write out op0
|
||||
x64Gen_writeU8(x64GenContext, op0);
|
||||
}
|
||||
|
||||
static constexpr bool isRevOrder()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr bool hasRex64BitPrefix()
|
||||
{
|
||||
return rex64Bit;
|
||||
}
|
||||
};
|
||||
|
||||
template<uint8 op0, bool rex64Bit = false>
|
||||
class x64_opc_1byte_rev
|
||||
{
|
||||
public:
|
||||
static void emitBytes(x64GenContext_t* x64GenContext)
|
||||
{
|
||||
// write out op0
|
||||
x64Gen_writeU8(x64GenContext, op0);
|
||||
}
|
||||
|
||||
static constexpr bool isRevOrder()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static constexpr bool hasRex64BitPrefix()
|
||||
{
|
||||
return rex64Bit;
|
||||
}
|
||||
};
|
||||
|
||||
template<uint8 op0, uint8 op1, bool rex64Bit = false>
|
||||
class x64_opc_2byte
|
||||
{
|
||||
public:
|
||||
static void emitBytes(x64GenContext_t* x64GenContext)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, op0);
|
||||
x64Gen_writeU8(x64GenContext, op1);
|
||||
}
|
||||
|
||||
static constexpr bool isRevOrder()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr bool hasRex64BitPrefix()
|
||||
{
|
||||
return rex64Bit;
|
||||
}
|
||||
};
|
||||
|
||||
enum class MODRM_OPR_TYPE
|
||||
{
|
||||
REG,
|
||||
MEM
|
||||
};
|
||||
|
||||
class x64MODRM_opr_reg64
|
||||
{
|
||||
public:
|
||||
x64MODRM_opr_reg64(uint8 reg)
|
||||
{
|
||||
this->reg = reg;
|
||||
}
|
||||
|
||||
static constexpr MODRM_OPR_TYPE getType()
|
||||
{
|
||||
return MODRM_OPR_TYPE::REG;
|
||||
}
|
||||
|
||||
const uint8 getReg() const
|
||||
{
|
||||
return reg;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8 reg;
|
||||
};
|
||||
|
||||
class x64MODRM_opr_memReg64
|
||||
{
|
||||
public:
|
||||
x64MODRM_opr_memReg64(uint8 reg)
|
||||
{
|
||||
this->reg = reg;
|
||||
this->offset = 0;
|
||||
}
|
||||
|
||||
x64MODRM_opr_memReg64(uint8 reg, sint32 offset)
|
||||
{
|
||||
this->reg = reg;
|
||||
this->offset = offset;
|
||||
}
|
||||
|
||||
static constexpr MODRM_OPR_TYPE getType()
|
||||
{
|
||||
return MODRM_OPR_TYPE::MEM;
|
||||
}
|
||||
|
||||
const uint8 getBaseReg() const
|
||||
{
|
||||
return reg;
|
||||
}
|
||||
|
||||
const uint32 getOffset() const
|
||||
{
|
||||
return (uint32)offset;
|
||||
}
|
||||
|
||||
static constexpr bool hasBaseReg()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static constexpr bool hasIndexReg()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
private:
|
||||
uint8 reg;
|
||||
sint32 offset;
|
||||
};
|
||||
|
||||
class x64MODRM_opr_memRegPlusReg
|
||||
{
|
||||
public:
|
||||
x64MODRM_opr_memRegPlusReg(uint8 regBase, uint8 regIndex)
|
||||
{
|
||||
if ((regIndex & 7) == 4)
|
||||
{
|
||||
// cant encode RSP/R12 in index register, switch with base register
|
||||
// this only works if the scaler is 1
|
||||
std::swap(regBase, regIndex);
|
||||
cemu_assert((regBase & 7) != 4);
|
||||
}
|
||||
this->regBase = regBase;
|
||||
this->regIndex = regIndex;
|
||||
this->offset = 0;
|
||||
}
|
||||
|
||||
x64MODRM_opr_memRegPlusReg(uint8 regBase, uint8 regIndex, sint32 offset)
|
||||
{
|
||||
if ((regIndex & 7) == 4)
|
||||
{
|
||||
std::swap(regBase, regIndex);
|
||||
cemu_assert((regIndex & 7) != 4);
|
||||
}
|
||||
this->regBase = regBase;
|
||||
this->regIndex = regIndex;
|
||||
this->offset = offset;
|
||||
}
|
||||
|
||||
static constexpr MODRM_OPR_TYPE getType()
|
||||
{
|
||||
return MODRM_OPR_TYPE::MEM;
|
||||
}
|
||||
|
||||
const uint8 getBaseReg() const
|
||||
{
|
||||
return regBase;
|
||||
}
|
||||
|
||||
const uint8 getIndexReg()
|
||||
{
|
||||
return regIndex;
|
||||
}
|
||||
|
||||
const uint32 getOffset() const
|
||||
{
|
||||
return (uint32)offset;
|
||||
}
|
||||
|
||||
static constexpr bool hasBaseReg()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static constexpr bool hasIndexReg()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
uint8 regBase;
|
||||
uint8 regIndex; // multiplied by scaler which is fixed to 1
|
||||
sint32 offset;
|
||||
};
|
||||
|
||||
template<class opcodeBytes, typename TA, typename TB>
|
||||
void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB)
|
||||
{
|
||||
static_assert(TA::getType() == MODRM_OPR_TYPE::REG);
|
||||
x64Gen_checkBuffer(x64GenContext);
|
||||
// REX prefix
|
||||
// 0100 WRXB
|
||||
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
|
||||
{
|
||||
if (opA.getReg() & 8 || opB.getReg() & 8 || opcodeBytes::hasRex64BitPrefix())
|
||||
{
|
||||
// opA -> REX.B
|
||||
// baseReg -> REX.R
|
||||
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getReg() & 8) ? (1 << 0) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
|
||||
}
|
||||
}
|
||||
else if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::MEM)
|
||||
{
|
||||
if constexpr (opB.hasBaseReg() && opB.hasIndexReg())
|
||||
{
|
||||
if (opA.getReg() & 8 || opB.getBaseReg() & 8 || opB.getIndexReg() & 8 || opcodeBytes::hasRex64BitPrefix())
|
||||
{
|
||||
// opA -> REX.B
|
||||
// baseReg -> REX.R
|
||||
// indexReg -> REX.X
|
||||
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getBaseReg() & 8) ? (1 << 0) : 0) | ((opB.getIndexReg() & 8) ? (1 << 1) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
|
||||
}
|
||||
}
|
||||
else if constexpr (opB.hasBaseReg())
|
||||
{
|
||||
if (opA.getReg() & 8 || opB.getBaseReg() & 8 || opcodeBytes::hasRex64BitPrefix())
|
||||
{
|
||||
// opA -> REX.B
|
||||
// baseReg -> REX.R
|
||||
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | ((opB.getBaseReg() & 8) ? (1 << 0) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (opA.getReg() & 8 || opcodeBytes::hasRex64BitPrefix())
|
||||
{
|
||||
// todo - verify
|
||||
// opA -> REX.B
|
||||
x64Gen_writeU8(x64GenContext, 0x40 | ((opA.getReg() & 8) ? (1 << 2) : 0) | (opcodeBytes::hasRex64BitPrefix() ? (1 << 3) : 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
// opcode
|
||||
opcodeBytes::emitBytes(x64GenContext);
|
||||
// modrm byte
|
||||
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
|
||||
{
|
||||
// reg, reg
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (opB.getReg() & 7) + ((opA.getReg() & 7) << 3));
|
||||
}
|
||||
else if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::MEM)
|
||||
{
|
||||
if constexpr (TB::hasBaseReg() == false) // todo - also check for index reg and secondary sib reg
|
||||
{
|
||||
// form: [offset]
|
||||
// instruction is just offset
|
||||
cemu_assert(false);
|
||||
}
|
||||
else if constexpr (TB::hasIndexReg())
|
||||
{
|
||||
// form: [base+index*scaler+offset], scaler is currently fixed to 1
|
||||
cemu_assert((opB.getIndexReg() & 7) != 4); // RSP not allowed as index register
|
||||
const uint32 offset = opB.getOffset();
|
||||
if (offset == 0 && (opB.getBaseReg() & 7) != 5) // RBP/R13 has special meaning in no-offset encoding
|
||||
{
|
||||
// [form: index*1+base]
|
||||
x64Gen_writeU8(x64GenContext, 0x00 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte
|
||||
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg()&7) << 3) + (opB.getBaseReg() & 7));
|
||||
}
|
||||
else if (offset == (uint32)(sint32)(sint8)offset)
|
||||
{
|
||||
// [form: index*1+base+sbyte]
|
||||
x64Gen_writeU8(x64GenContext, 0x40 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte
|
||||
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg() & 7) << 3) + (opB.getBaseReg() & 7));
|
||||
x64Gen_writeU8(x64GenContext, (uint8)offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// [form: index*1+base+sdword]
|
||||
x64Gen_writeU8(x64GenContext, 0x80 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte
|
||||
x64Gen_writeU8(x64GenContext, ((opB.getIndexReg() & 7) << 3) + (opB.getBaseReg() & 7));
|
||||
x64Gen_writeU32(x64GenContext, (uint32)offset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// form: [baseReg + offset]
|
||||
const uint32 offset = opB.getOffset();
|
||||
if (offset == 0 && (opB.getBaseReg() & 7) != 5) // RBP/R13 has special meaning in no-offset encoding
|
||||
{
|
||||
// form: [baseReg]
|
||||
// if base reg is RSP/R12 we need to use SIB form of instruction
|
||||
if ((opB.getBaseReg() & 7) == 4)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x00 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte [form: none*1+base]
|
||||
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x00 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
|
||||
}
|
||||
}
|
||||
else if (offset == (uint32)(sint32)(sint8)offset)
|
||||
{
|
||||
// form: [baseReg+sbyte]
|
||||
// if base reg is RSP/R12 we need to use SIB form of instruction
|
||||
if ((opB.getBaseReg() & 7) == 4)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x40 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte [form: none*1+base]
|
||||
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x40 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
|
||||
}
|
||||
x64Gen_writeU8(x64GenContext, (uint8)offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// form: [baseReg+sdword]
|
||||
// if base reg is RSP/R12 we need to use SIB form of instruction
|
||||
if ((opB.getBaseReg() & 7) == 4)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x80 + (4) + ((opA.getReg() & 7) << 3));
|
||||
// SIB byte [form: none*1+base]
|
||||
x64Gen_writeU8(x64GenContext, (4 << 3) + (opB.getBaseReg() & 7));
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x80 + (opB.getBaseReg() & 7) + ((opA.getReg() & 7) << 3));
|
||||
}
|
||||
x64Gen_writeU32(x64GenContext, (uint32)offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
template<class opcodeBytes, typename TA, typename TB>
|
||||
void x64Gen_writeMODRM_dyn(x64GenContext_t* x64GenContext, TA opLeft, TB opRight)
|
||||
{
|
||||
if constexpr (opcodeBytes::isRevOrder())
|
||||
_x64Gen_writeMODRM_internal<opcodeBytes, TB, TA>(x64GenContext, opRight, opLeft);
|
||||
else
|
||||
_x64Gen_writeMODRM_internal<opcodeBytes, TA, TB>(x64GenContext, opLeft, opRight);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue