diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 6ae4b892..c7cbc202 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -202,7 +202,7 @@ jobs:
- name: "Install molten-vk"
run: |
- curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.2.9/MoltenVK-macos.tar
+ curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.3.0/MoltenVK-macos.tar
tar xf MoltenVK-macos.tar
sudo mkdir -p /usr/local/lib
sudo cp MoltenVK/MoltenVK/dynamic/dylib/macOS/libMoltenVK.dylib /usr/local/lib
diff --git a/.gitmodules b/.gitmodules
index daf39a78..82e53209 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -22,3 +22,6 @@
path = dependencies/metal-cpp
url = https://github.com/bkaradzic/metal-cpp.git
shallow = true
+[submodule "dependencies/xbyak_aarch64"]
+ path = dependencies/xbyak_aarch64
+ url = https://github.com/fujitsu/xbyak_aarch64
diff --git a/BUILD.md b/BUILD.md
index 662be96d..31c26531 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -192,3 +192,41 @@ Then install the dependencies:
If CMake complains about Cemu already being compiled or another similar error, try deleting the `CMakeCache.txt` file inside the `build` folder and retry building.
+## CMake configure flags
+Some flags can be passed during CMake configure to customise which features are enabled on build.
+
+Example usage: `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DENABLE_SDL=ON -DENABLE_VULKAN=OFF`
+
+### All platforms
+| Flag | | Description | Default | Note |
+|--------------------|:--|-----------------------------------------------------------------------------|---------|--------------------|
+| ALLOW_PORTABLE | | Allow Cemu to use the `portable` directory to store configs and data | ON | |
+| CEMU_CXX_FLAGS | | Flags passed straight to the compiler, e.g. `-march=native`, `-Wall`, `/W3` | "" | |
+| ENABLE_CUBEB | | Enable cubeb audio backend | ON | |
+| ENABLE_DISCORD_RPC | | Enable Discord Rich presence support | ON | |
+| ENABLE_OPENGL | | Enable OpenGL graphics backend | ON | Currently required |
+| ENABLE_HIDAPI | | Enable HIDAPI (used for Wiimote controller API) | ON | |
+| ENABLE_SDL | | Enable SDLController controller API | ON | Currently required |
+| ENABLE_VCPKG | | Use VCPKG package manager to obtain dependencies | ON | |
+| ENABLE_VULKAN | | Enable the Vulkan graphics backend | ON | |
+| ENABLE_WXWIDGETS | | Enable wxWidgets UI | ON | Currently required |
+
+### Windows
+| Flag | Description | Default | Note |
+|--------------------|-----------------------------------|---------|--------------------|
+| ENABLE_DIRECTAUDIO | Enable DirectAudio audio backend | ON | Currently required |
+| ENABLE_DIRECTINPUT | Enable DirectInput controller API | ON | Currently required |
+| ENABLE_XAUDIO | Enable XAudio audio backend | ON | |
+| ENABLE_XINPUT | Enable XInput controller API | ON | |
+
+### Linux
+| Flag | Description | Default |
+|-----------------------|----------------------------------------------------|---------|
+| ENABLE_BLUEZ | Build with Bluez (used for Wiimote controller API) | ON |
+| ENABLE_FERAL_GAMEMODE | Enable Feral Interactive GameMode support | ON |
+| ENABLE_WAYLAND | Enable Wayland support | ON |
+
+### macOS
+| Flag | Description | Default |
+|--------------|------------------------------------------------|---------|
+| MACOS_BUNDLE | MacOS executable will be an application bundle | OFF |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 93198e1b..efbdc1ab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -238,6 +238,10 @@ endif()
add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL)
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)")
+ add_subdirectory("dependencies/xbyak_aarch64" EXCLUDE_FROM_ALL)
+endif()
+
find_package(ZArchive)
if (NOT ZArchive_FOUND)
add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL)
diff --git a/boost.natvis b/boost.natvis
new file mode 100644
index 00000000..2781a585
--- /dev/null
+++ b/boost.natvis
@@ -0,0 +1,26 @@
+
+
+
+
+
+ - m_holder.m_size
+
+ m_holder.m_size
+ m_holder.m_start
+
+
+
+
+
+ {{ size={m_holder.m_size} }}
+
+ - m_holder.m_size
+ - static_capacity
+
+ m_holder.m_size
+ ($T1*)m_holder.storage.data
+
+
+
+
+
diff --git a/dependencies/vcpkg b/dependencies/vcpkg
index a4275b7e..533a5fda 160000
--- a/dependencies/vcpkg
+++ b/dependencies/vcpkg
@@ -1 +1 @@
-Subproject commit a4275b7eee79fb24ec2e135481ef5fce8b41c339
+Subproject commit 533a5fda5c0646d1771345fb572e759283444d5f
diff --git a/dependencies/xbyak_aarch64 b/dependencies/xbyak_aarch64
new file mode 160000
index 00000000..904b8923
--- /dev/null
+++ b/dependencies/xbyak_aarch64
@@ -0,0 +1 @@
+Subproject commit 904b8923457f3ec0d6f82ea2d6832a792851194d
diff --git a/dist/linux/info.cemu.Cemu.desktop b/dist/linux/info.cemu.Cemu.desktop
index 5003d4a6..6eeb0120 100644
--- a/dist/linux/info.cemu.Cemu.desktop
+++ b/dist/linux/info.cemu.Cemu.desktop
@@ -24,3 +24,4 @@ Comment[it]=Software per emulare giochi e applicazioni per Wii U su PC
Categories=Game;Emulator;
Keywords=Nintendo;
MimeType=application/x-wii-u-rom;
+StartupWMClass=Cemu
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 79471321..ee7f8610 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -49,7 +49,6 @@ add_subdirectory(audio)
add_subdirectory(util)
add_subdirectory(imgui)
add_subdirectory(resource)
-add_subdirectory(asm)
add_executable(CemuBin
main.cpp
diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt
index 881a6d6d..85df32c4 100644
--- a/src/Cafe/CMakeLists.txt
+++ b/src/Cafe/CMakeLists.txt
@@ -67,24 +67,31 @@ add_library(CemuCafe
HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
HW/Espresso/Recompiler/PPCRecompiler.cpp
HW/Espresso/Recompiler/PPCRecompiler.h
- HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
+ HW/Espresso/Recompiler/IML/IML.h
+ HW/Espresso/Recompiler/IML/IMLSegment.cpp
+ HW/Espresso/Recompiler/IML/IMLSegment.h
+ HW/Espresso/Recompiler/IML/IMLInstruction.cpp
+ HW/Espresso/Recompiler/IML/IMLInstruction.h
+ HW/Espresso/Recompiler/IML/IMLDebug.cpp
+ HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
+ HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
+ HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
+ HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
+ HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
+ HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp
HW/Espresso/Recompiler/PPCRecompilerIml.h
- HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
- HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
- HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
- HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp
- HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp
HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64.h
- HW/Espresso/Recompiler/x64Emit.hpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64.h
+ HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
+ HW/Espresso/Recompiler/BackendX64/x86Emitter.h
HW/Latte/Common/RegisterSerializer.cpp
HW/Latte/Common/RegisterSerializer.h
HW/Latte/Common/ShaderSerializer.cpp
@@ -469,6 +476,10 @@ add_library(CemuCafe
OS/libs/nsyshid/Infinity.h
OS/libs/nsyshid/Skylander.cpp
OS/libs/nsyshid/Skylander.h
+ OS/libs/nsyshid/SkylanderXbox360.cpp
+ OS/libs/nsyshid/SkylanderXbox360.h
+ OS/libs/nsyshid/g721/g721.cpp
+ OS/libs/nsyshid/g721/g721.h
OS/libs/nsyskbd/nsyskbd.cpp
OS/libs/nsyskbd/nsyskbd.h
OS/libs/nsysnet/nsysnet.cpp
@@ -589,6 +600,14 @@ if(ENABLE_METAL)
#)
endif()
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)")
+ target_sources(CemuCafe PRIVATE
+ HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
+ HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
+ )
+ target_link_libraries(CemuCafe PRIVATE xbyak_aarch64)
+endif()
+
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>")
target_include_directories(CemuCafe PUBLIC "../")
@@ -600,7 +619,6 @@ else()
endif()
target_link_libraries(CemuCafe PRIVATE
- CemuAsm
CemuAudio
CemuCommon
CemuComponents
diff --git a/src/Cafe/CafeSystem.cpp b/src/Cafe/CafeSystem.cpp
index 66b0969f..f4d05937 100644
--- a/src/Cafe/CafeSystem.cpp
+++ b/src/Cafe/CafeSystem.cpp
@@ -854,7 +854,7 @@ namespace CafeSystem
module->TitleStart();
cemu_initForGame();
// enter scheduler
- if (ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler && !LaunchSettings::ForceInterpreter())
+ if ((ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler || LaunchSettings::ForceMultiCoreInterpreter()) && !LaunchSettings::ForceInterpreter())
coreinit::OSSchedulerBegin(3);
else
coreinit::OSSchedulerBegin(1);
diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
index 37e374d6..e84c9fda 100644
--- a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
+++ b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
@@ -8,6 +8,7 @@
#include "gui/debugger/DebuggerWindow2.h"
#include "Cafe/OS/libs/coreinit/coreinit.h"
+#include "util/helpers/helpers.h"
#if BOOST_OS_WINDOWS
#include
@@ -136,11 +137,6 @@ void debugger_createCodeBreakpoint(uint32 address, uint8 bpType)
debugger_updateExecutionBreakpoint(address);
}
-void debugger_createExecuteBreakpoint(uint32 address)
-{
- debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL);
-}
-
namespace coreinit
{
std::vector& OSGetSchedulerThreads();
@@ -294,8 +290,23 @@ void debugger_toggleExecuteBreakpoint(uint32 address)
}
else
{
- // create new breakpoint
- debugger_createExecuteBreakpoint(address);
+ // create new execution breakpoint
+ debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL);
+ }
+}
+
+void debugger_toggleLoggingBreakpoint(uint32 address)
+{
+ auto existingBP = debugger_getFirstBP(address, DEBUGGER_BP_T_LOGGING);
+ if (existingBP)
+ {
+ // delete existing breakpoint
+ debugger_deleteBreakpoint(existingBP);
+ }
+ else
+ {
+ // create new logging breakpoint
+ debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_LOGGING);
}
}
@@ -538,7 +549,48 @@ void debugger_enterTW(PPCInterpreter_t* hCPU)
{
if (bp->bpType == DEBUGGER_BP_T_LOGGING && bp->enabled)
{
- std::string logName = !bp->comment.empty() ? "Breakpoint '"+boost::nowide::narrow(bp->comment)+"'" : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address);
+ std::string comment = !bp->comment.empty() ? boost::nowide::narrow(bp->comment) : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address);
+
+ auto replacePlaceholders = [&](const std::string& prefix, const auto& formatFunc)
+ {
+ size_t pos = 0;
+ while ((pos = comment.find(prefix, pos)) != std::string::npos)
+ {
+ size_t endPos = comment.find('}', pos);
+ if (endPos == std::string::npos)
+ break;
+
+ try
+ {
+ if (int regNum = ConvertString(comment.substr(pos + prefix.length(), endPos - pos - prefix.length())); regNum >= 0 && regNum < 32)
+ {
+ std::string replacement = formatFunc(regNum);
+ comment.replace(pos, endPos - pos + 1, replacement);
+ pos += replacement.length();
+ }
+ else
+ {
+ pos = endPos + 1;
+ }
+ }
+ catch (...)
+ {
+ pos = endPos + 1;
+ }
+ }
+ };
+
+ // Replace integer register placeholders {rX}
+ replacePlaceholders("{r", [&](int regNum) {
+ return fmt::format("0x{:08X}", hCPU->gpr[regNum]);
+ });
+
+ // Replace floating point register placeholders {fX}
+ replacePlaceholders("{f", [&](int regNum) {
+ return fmt::format("{}", hCPU->fpr[regNum].fpr);
+ });
+
+ std::string logName = "Breakpoint '" + comment + "'";
std::string logContext = fmt::format("Thread: {:08x} LR: 0x{:08x}", MEMPTR(coreinit::OSGetCurrentThread()).GetMPTR(), hCPU->spr.LR, cemuLog_advancedPPCLoggingEnabled() ? " Stack Trace:" : "");
cemuLog_log(LogType::Force, "[Debugger] {} was executed! {}", logName, logContext);
if (cemuLog_advancedPPCLoggingEnabled())
diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.h b/src/Cafe/HW/Espresso/Debugger/Debugger.h
index 249c47b8..c220eb8a 100644
--- a/src/Cafe/HW/Espresso/Debugger/Debugger.h
+++ b/src/Cafe/HW/Espresso/Debugger/Debugger.h
@@ -100,8 +100,8 @@ extern debuggerState_t debuggerState;
// new API
DebuggerBreakpoint* debugger_getFirstBP(uint32 address);
void debugger_createCodeBreakpoint(uint32 address, uint8 bpType);
-void debugger_createExecuteBreakpoint(uint32 address);
void debugger_toggleExecuteBreakpoint(uint32 address); // create/remove execute breakpoint
+void debugger_toggleLoggingBreakpoint(uint32 address); // create/remove logging breakpoint
void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp);
void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite);
diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h
index b3ae45c3..5e09763b 100644
--- a/src/Cafe/HW/Espresso/EspressoISA.h
+++ b/src/Cafe/HW/Espresso/EspressoISA.h
@@ -10,6 +10,18 @@ namespace Espresso
CR_BIT_INDEX_SO = 3,
};
+ enum class PSQ_LOAD_TYPE
+ {
+ TYPE_F32 = 0,
+ TYPE_UNUSED1 = 1,
+ TYPE_UNUSED2 = 2,
+ TYPE_UNUSED3 = 3,
+ TYPE_U8 = 4,
+ TYPE_U16 = 5,
+ TYPE_S8 = 6,
+ TYPE_S16 = 7,
+ };
+
enum class PrimaryOpcode
{
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)
@@ -91,13 +103,15 @@ namespace Espresso
BCCTR = 528
};
- enum class OPCODE_31
+ enum class Opcode31
{
-
+ TW = 4,
+ MFTB = 371,
};
inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); };
inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); };
+ inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); };
struct BOField
{
@@ -132,6 +146,12 @@ namespace Espresso
uint8 bo;
};
+ // returns true if LK bit is set, only valid for branch instructions
+ inline bool DecodeLK(uint32 opcode)
+ {
+ return (opcode & 1) != 0;
+ }
+
inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK)
{
LI = opcode & 0x3fffffc;
@@ -183,13 +203,7 @@ namespace Espresso
_decodeForm_D_branch(opcode, BD, BO, BI, AA, LK);
}
- inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
- {
- // form XL (with BD field expected to be zero)
- _decodeForm_XL(opcode, BO, BI, LK);
- }
-
- inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
+ inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR
{
// form XL (with BD field expected to be zero)
_decodeForm_XL(opcode, BO, BI, LK);
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
index fe9316f0..769344f8 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
@@ -3,12 +3,12 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow)
{
if (hasOverflow)
{
- hCPU->spr.XER |= XER_SO;
- hCPU->spr.XER |= XER_OV;
+ hCPU->xer_so = 1;
+ hCPU->xer_ov = 1;
}
else
{
- hCPU->spr.XER &= ~XER_OV;
+ hCPU->xer_ov = 0;
}
}
@@ -246,7 +246,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
hCPU->gpr[rD] = ~a + b + 1;
- // update xer
+ // update carry
if (ppc_carry_3(~a, b, 1))
hCPU->xer_ca = 1;
else
@@ -848,8 +848,7 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- if ((hCPU->spr.XER & XER_SO) != 0)
- hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
+ hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -871,8 +870,7 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- if ((hCPU->spr.XER & XER_SO) != 0)
- hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
+ hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -895,8 +893,7 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- if (hCPU->spr.XER & XER_SO)
- hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
+ hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -919,8 +916,7 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- if (hCPU->spr.XER & XER_SO)
- hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
+ hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
index aed571d7..2c99b84c 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
@@ -32,7 +32,7 @@ espresso_frsqrte_entry_t frsqrteLookupTable[32] =
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
};
-double frsqrte_espresso(double input)
+ATTR_MS_ABI double frsqrte_espresso(double input)
{
unsigned long long x = *(unsigned long long*)&input;
@@ -111,7 +111,7 @@ espresso_fres_entry_t fresLookupTable[32] =
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
};
-double fres_espresso(double input)
+ATTR_MS_ABI double fres_espresso(double input)
{
// based on testing we know that fres uses only the first 15 bits of the mantissa
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
index bc8458d9..896fd21c 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
@@ -50,9 +50,9 @@
#define CR_BIT_EQ 2
#define CR_BIT_SO 3
-#define XER_SO (1<<31) // summary overflow bit
-#define XER_OV (1<<30) // overflow bit
#define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8
+#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO
+#define XER_BIT_OV (30)
// FPSCR
#define FPSCR_VXSNAN (1<<24)
@@ -118,7 +118,8 @@
static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r)
{
- hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0;
+ cemu_assert_debug(hCPU->xer_so <= 1);
+ hCPU->cr[CR_BIT_SO] = hCPU->xer_so;
hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0);
hCPU->cr[CR_BIT_EQ] = (r == 0);
hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0
@@ -190,8 +191,8 @@ inline double roundTo25BitAccuracy(double d)
return *(double*)&v;
}
-double fres_espresso(double input);
-double frsqrte_espresso(double input);
+ATTR_MS_ABI double fres_espresso(double input);
+ATTR_MS_ABI double frsqrte_espresso(double input);
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
index 694e05e6..26467458 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
@@ -85,7 +85,8 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode)
ppc_setCRBit(hCPU, CR_BIT_GT, 0);
ppc_setCRBit(hCPU, CR_BIT_EQ, 1);
}
- ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0);
+ cemu_assert_debug(hCPU->xer_so <= 1);
+ ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so);
// remove reservation
hCPU->reservedMemAddr = 0;
hCPU->reservedMemValue = 0;
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
index ace1601f..08d6765a 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
@@ -63,16 +63,24 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU)
{
uint32 xerValue = hCPU->spr.XER;
- xerValue &= ~(1<xer_ca )
- xerValue |= (1<xer_ca)
+ xerValue |= (1 << XER_BIT_CA);
+ if (hCPU->xer_so)
+ xerValue |= (1 << XER_BIT_SO);
+ if (hCPU->xer_ov)
+ xerValue |= (1 << XER_BIT_OV);
return xerValue;
}
void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v)
{
hCPU->spr.XER = v;
- hCPU->xer_ca = (v>>XER_BIT_CA)&1;
+ hCPU->xer_ca = (v >> XER_BIT_CA) & 1;
+ hCPU->xer_so = (v >> XER_BIT_SO) & 1;
+ hCPU->xer_ov = (v >> XER_BIT_OV) & 1;
}
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU)
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
index 12f86427..d6b643ee 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
@@ -5,7 +5,6 @@
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
#include "../Recompiler/PPCRecompiler.h"
-#include "../Recompiler/PPCRecompilerX64.h"
#include
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h
index c315ed0e..179e2687 100644
--- a/src/Cafe/HW/Espresso/PPCState.h
+++ b/src/Cafe/HW/Espresso/PPCState.h
@@ -49,12 +49,12 @@ struct PPCInterpreter_t
uint32 fpscr;
uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..)
uint8 xer_ca; // carry from xer
- uint8 LSQE;
- uint8 PSE;
+ uint8 xer_so;
+ uint8 xer_ov;
// thread remaining cycles
sint32 remainingCycles; // if this value goes below zero, the next thread is scheduled
sint32 skippedCycles; // number of skipped cycles
- struct
+ struct
{
uint32 LR;
uint32 CTR;
@@ -67,9 +67,10 @@ struct PPCInterpreter_t
uint32 reservedMemValue;
// temporary storage for recompiler
FPR_t temporaryFPR[8];
- uint32 temporaryGPR[4];
+ uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away
+ uint32 temporaryGPR_reg[4];
// values below this are not used by Cafe OS usermode
- struct
+ struct
{
uint32 fpecr; // is this the same register as fpscr ?
uint32 DEC;
@@ -84,7 +85,7 @@ struct PPCInterpreter_t
// DMA
uint32 dmaU;
uint32 dmaL;
- // MMU
+ // MMU
uint32 dbatU[8];
uint32 dbatL[8];
uint32 ibatU[8];
@@ -92,6 +93,8 @@ struct PPCInterpreter_t
uint32 sr[16];
uint32 sdr1;
}sprExtended;
+ uint8 LSQE;
+ uint8 PSE;
// global CPU values
PPCInterpreterGlobal_t* global;
// interpreter control
diff --git a/src/Cafe/HW/Espresso/PPCTimer.cpp b/src/Cafe/HW/Espresso/PPCTimer.cpp
index c27c94ee..257973a6 100644
--- a/src/Cafe/HW/Espresso/PPCTimer.cpp
+++ b/src/Cafe/HW/Espresso/PPCTimer.cpp
@@ -1,5 +1,4 @@
#include "Cafe/HW/Espresso/Const.h"
-#include "asm/x64util.h"
#include "config/ActiveSettings.h"
#include "util/helpers/fspinlock.h"
#include "util/highresolutiontimer/HighResolutionTimer.h"
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
new file mode 100644
index 00000000..cb71234d
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
@@ -0,0 +1,1693 @@
+#include "BackendAArch64.h"
+
+#pragma push_macro("CSIZE")
+#undef CSIZE
+#include
+#pragma pop_macro("CSIZE")
+#include
+
+#include
+
+#include "../PPCRecompiler.h"
+#include "Common/precompiled.h"
+#include "Common/cpu_features.h"
+#include "HW/Espresso/Interpreter/PPCInterpreterInternal.h"
+#include "HW/Espresso/Interpreter/PPCInterpreterHelper.h"
+#include "HW/Espresso/PPCState.h"
+
+using namespace Xbyak_aarch64;
+
+constexpr uint32 TEMP_GPR_1_ID = 25;
+constexpr uint32 TEMP_GPR_2_ID = 26;
+constexpr uint32 PPC_RECOMPILER_INSTANCE_DATA_REG_ID = 27;
+constexpr uint32 MEMORY_BASE_REG_ID = 28;
+constexpr uint32 HCPU_REG_ID = 29;
+
+constexpr uint32 TEMP_FPR_ID = 31;
+
+struct FPReg
+{
+ explicit FPReg(size_t index)
+ : index(index), VReg(index), QReg(index), DReg(index), SReg(index), HReg(index), BReg(index)
+ {
+ }
+ const size_t index;
+ const VReg VReg;
+ const QReg QReg;
+ const DReg DReg;
+ const SReg SReg;
+ const HReg HReg;
+ const BReg BReg;
+};
+
+struct GPReg
+{
+ explicit GPReg(size_t index)
+ : index(index), XReg(index), WReg(index)
+ {
+ }
+ const size_t index;
+ const XReg XReg;
+ const WReg WReg;
+};
+
+static const XReg HCPU_REG{HCPU_REG_ID}, PPC_REC_INSTANCE_REG{PPC_RECOMPILER_INSTANCE_DATA_REG_ID}, MEM_BASE_REG{MEMORY_BASE_REG_ID};
+static const GPReg TEMP_GPR1{TEMP_GPR_1_ID};
+static const GPReg TEMP_GPR2{TEMP_GPR_2_ID};
+static const GPReg LR{TEMP_GPR_2_ID};
+
+static const FPReg TEMP_FPR{TEMP_FPR_ID};
+
+static const util::Cpu s_cpu;
+
+class AArch64Allocator : public Allocator
+{
+ private:
+#ifdef XBYAK_USE_MMAP_ALLOCATOR
+ inline static MmapAllocator s_allocator;
+#else
+ inline static Allocator s_allocator;
+#endif
+ Allocator* m_allocatorImpl;
+ bool m_freeDisabled = false;
+
+ public:
+ AArch64Allocator()
+ : m_allocatorImpl(reinterpret_cast(&s_allocator)) {}
+
+ uint32* alloc(size_t size) override
+ {
+ return m_allocatorImpl->alloc(size);
+ }
+
+ void setFreeDisabled(bool disabled)
+ {
+ m_freeDisabled = disabled;
+ }
+
+ void free(uint32* p) override
+ {
+ if (!m_freeDisabled)
+ m_allocatorImpl->free(p);
+ }
+
+ [[nodiscard]] bool useProtect() const override
+ {
+ return !m_freeDisabled && m_allocatorImpl->useProtect();
+ }
+};
+
+struct UnconditionalJumpInfo
+{
+ IMLSegment* target;
+};
+
+struct ConditionalRegJumpInfo
+{
+ IMLSegment* target;
+ WReg regBool;
+ bool mustBeTrue;
+};
+
+struct NegativeRegValueJumpInfo
+{
+ IMLSegment* target;
+ WReg regValue;
+};
+
+using JumpInfo = std::variant<
+ UnconditionalJumpInfo,
+ ConditionalRegJumpInfo,
+ NegativeRegValueJumpInfo>;
+
+struct AArch64GenContext_t : CodeGenerator
+{
+ explicit AArch64GenContext_t(Allocator* allocator = nullptr);
+ void enterRecompilerCode();
+ void leaveRecompilerCode();
+
+ void r_name(IMLInstruction* imlInstruction);
+ void name_r(IMLInstruction* imlInstruction);
+ bool r_s32(IMLInstruction* imlInstruction);
+ bool r_r(IMLInstruction* imlInstruction);
+ bool r_r_s32(IMLInstruction* imlInstruction);
+ bool r_r_s32_carry(IMLInstruction* imlInstruction);
+ bool r_r_r(IMLInstruction* imlInstruction);
+ bool r_r_r_carry(IMLInstruction* imlInstruction);
+ void compare(IMLInstruction* imlInstruction);
+ void compare_s32(IMLInstruction* imlInstruction);
+ bool load(IMLInstruction* imlInstruction, bool indexed);
+ bool store(IMLInstruction* imlInstruction, bool indexed);
+ void atomic_cmp_store(IMLInstruction* imlInstruction);
+ bool macro(IMLInstruction* imlInstruction);
+ void call_imm(IMLInstruction* imlInstruction);
+ bool fpr_load(IMLInstruction* imlInstruction, bool indexed);
+ bool fpr_store(IMLInstruction* imlInstruction, bool indexed);
+ void fpr_r_r(IMLInstruction* imlInstruction);
+ void fpr_r_r_r(IMLInstruction* imlInstruction);
+ void fpr_r_r_r_r(IMLInstruction* imlInstruction);
+ void fpr_r(IMLInstruction* imlInstruction);
+ void fpr_compare(IMLInstruction* imlInstruction);
+ void cjump(IMLInstruction* imlInstruction, IMLSegment* imlSegment);
+ void jump(IMLSegment* imlSegment);
+ void conditionalJumpCycleCheck(IMLSegment* imlSegment);
+
+ static constexpr size_t MAX_JUMP_INSTR_COUNT = 2;
+ std::list> jumps;
+ void prepareJump(JumpInfo&& jumpInfo)
+ {
+ jumps.emplace_back(getSize(), jumpInfo);
+ for (int i = 0; i < MAX_JUMP_INSTR_COUNT; ++i)
+ nop();
+ }
+
+ std::map segmentStarts;
+ void storeSegmentStart(IMLSegment* imlSegment)
+ {
+ segmentStarts[imlSegment] = getSize();
+ }
+
+ bool processAllJumps()
+ {
+ for (auto&& [jumpStart, jumpInfo] : jumps)
+ {
+ bool success = std::visit(
+ [&, this](const auto& jump) {
+ setSize(jumpStart);
+ sint64 targetAddress = segmentStarts.at(jump.target);
+ sint64 addressOffset = targetAddress - jumpStart;
+ return handleJump(addressOffset, jump);
+ },
+ jumpInfo);
+ if (!success)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool handleJump(sint64 addressOffset, const UnconditionalJumpInfo& jump)
+ {
+ // in +/-128MB
+ if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
+ {
+ b(addressOffset);
+ return true;
+ }
+
+ cemu_assert_suspicious();
+
+ return false;
+ }
+
+ bool handleJump(sint64 addressOffset, const ConditionalRegJumpInfo& jump)
+ {
+ bool mustBeTrue = jump.mustBeTrue;
+
+ // in +/-32KB
+ if (-0x8000 <= addressOffset && addressOffset <= 0x7fff)
+ {
+ if (mustBeTrue)
+ tbnz(jump.regBool, 0, addressOffset);
+ else
+ tbz(jump.regBool, 0, addressOffset);
+ return true;
+ }
+
+ // in +/-1MB
+ if (-0x100000 <= addressOffset && addressOffset <= 0xfffff)
+ {
+ if (mustBeTrue)
+ cbnz(jump.regBool, addressOffset);
+ else
+ cbz(jump.regBool, addressOffset);
+ return true;
+ }
+
+ Label skipJump;
+ if (mustBeTrue)
+ tbz(jump.regBool, 0, skipJump);
+ else
+ tbnz(jump.regBool, 0, skipJump);
+ addressOffset -= 4;
+
+ // in +/-128MB
+ if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
+ {
+ b(addressOffset);
+ L(skipJump);
+ return true;
+ }
+
+ cemu_assert_suspicious();
+
+ return false;
+ }
+
+ bool handleJump(sint64 addressOffset, const NegativeRegValueJumpInfo& jump)
+ {
+ // in +/-32KB
+ if (-0x8000 <= addressOffset && addressOffset <= 0x7fff)
+ {
+ tbnz(jump.regValue, 31, addressOffset);
+ return true;
+ }
+
+ // in +/-1MB
+ if (-0x100000 <= addressOffset && addressOffset <= 0xfffff)
+ {
+ tst(jump.regValue, 0x80000000);
+ addressOffset -= 4;
+ bne(addressOffset);
+ return true;
+ }
+
+ Label skipJump;
+ tbz(jump.regValue, 31, skipJump);
+ addressOffset -= 4;
+
+ // in +/-128MB
+ if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
+ {
+ b(addressOffset);
+ L(skipJump);
+ return true;
+ }
+
+ cemu_assert_suspicious();
+
+ return false;
+ }
+};
+
+template T>
+T fpReg(const IMLReg& imlReg)
+{
+ cemu_assert_debug(imlReg.GetRegFormat() == IMLRegFormat::F64);
+ auto regId = imlReg.GetRegID();
+ cemu_assert_debug(regId >= IMLArchAArch64::PHYSREG_FPR_BASE && regId < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT);
+ return T(regId - IMLArchAArch64::PHYSREG_FPR_BASE);
+}
+
+template T>
+T gpReg(const IMLReg& imlReg)
+{
+ auto regFormat = imlReg.GetRegFormat();
+ if (std::is_same_v)
+ cemu_assert_debug(regFormat == IMLRegFormat::I32);
+ else if (std::is_same_v)
+ cemu_assert_debug(regFormat == IMLRegFormat::I64);
+ else
+ cemu_assert_unimplemented();
+
+ auto regId = imlReg.GetRegID();
+ cemu_assert_debug(regId >= IMLArchAArch64::PHYSREG_GPR_BASE && regId < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT);
+ return T(regId - IMLArchAArch64::PHYSREG_GPR_BASE);
+}
+
+template To, std::derived_from From>
+To aliasAs(const From& reg)
+{
+ return To(reg.getIdx());
+}
+
+template To, std::derived_from From>
+To aliasAs(const From& reg)
+{
+ return To(reg.getIdx());
+}
+
+AArch64GenContext_t::AArch64GenContext_t(Allocator* allocator)
+ : CodeGenerator(DEFAULT_MAX_CODE_SIZE, AutoGrow, allocator)
+{
+}
+
+constexpr uint64 ones(uint32 size)
+{
+ return (size == 64) ? 0xffffffffffffffff : ((uint64)1 << size) - 1;
+}
+
+constexpr bool isAdrImmValidFPR(sint32 imm, uint32 bits)
+{
+ uint32 times = bits / 8;
+ uint32 sh = std::countr_zero(times);
+ return (0 <= imm && imm <= 4095 * times) && ((uint64)imm & ones(sh)) == 0;
+}
+
+constexpr bool isAdrImmValidGPR(sint32 imm, uint32 bits = 32)
+{
+ uint32 size = std::countr_zero(bits / 8u);
+ sint32 times = 1 << size;
+ return (0 <= imm && imm <= 4095 * times) && ((uint64)imm & ones(size)) == 0;
+}
+
+constexpr bool isAdrImmRangeValid(sint32 rangeStart, sint32 rangeOffset, sint32 bits, std::invocable auto check)
+{
+ for (sint32 i = rangeStart; i <= rangeStart + rangeOffset; i += bits / 8)
+ if (!check(i, bits))
+ return false;
+ return true;
+}
+
+constexpr bool isAdrImmRangeValidGPR(sint32 rangeStart, sint32 rangeOffset, sint32 bits = 32)
+{
+ return isAdrImmRangeValid(rangeStart, rangeOffset, bits, isAdrImmValidGPR);
+}
+
+constexpr bool isAdrImmRangeValidFpr(sint32 rangeStart, sint32 rangeOffset, sint32 bits)
+{
+ return isAdrImmRangeValid(rangeStart, rangeOffset, bits, isAdrImmValidFPR);
+}
+
+// Verify that all of the offsets for the PPCInterpreter_t members that we use in r_name/name_r have a valid imm value for AdrUimm
+static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, gpr), sizeof(uint32) * 31));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.LR)));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.CTR)));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.XER)));
+static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, spr.UGQR), sizeof(PPCInterpreter_t::spr.UGQR[0]) * (SPR_UGQR7 - SPR_UGQR0)));
+static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, temporaryGPR_reg), sizeof(uint32) * 3));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, xer_ca), 8));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, xer_so), 8));
+static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, cr), PPCREC_NAME_CR_LAST - PPCREC_NAME_CR, 8));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, reservedMemAddr)));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, reservedMemValue)));
+static_assert(isAdrImmRangeValidFpr(offsetof(PPCInterpreter_t, fpr), sizeof(FPR_t) * 63, 64));
+static_assert(isAdrImmRangeValidFpr(offsetof(PPCInterpreter_t, temporaryFPR), sizeof(FPR_t) * 7, 128));
+
+void AArch64GenContext_t::r_name(IMLInstruction* imlInstruction)
+{
+ uint32 name = imlInstruction->op_r_name.name;
+
+ if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ XReg regRXReg = gpReg(imlInstruction->op_r_name.regR);
+ WReg regR = aliasAs(regRXReg);
+ if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
+ {
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)));
+ }
+ else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
+ {
+ uint32 sprIndex = (name - PPCREC_NAME_SPR0);
+ if (sprIndex == SPR_LR)
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
+ else if (sprIndex == SPR_CTR)
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.CTR)));
+ else if (sprIndex == SPR_XER)
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.XER)));
+ else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0)));
+ else
+ cemu_assert_suspicious();
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
+ {
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)));
+ }
+ else if (name == PPCREC_NAME_XER_CA)
+ {
+ ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_ca)));
+ }
+ else if (name == PPCREC_NAME_XER_SO)
+ {
+ ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_so)));
+ }
+ else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
+ {
+ ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_EA)
+ {
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemAddr)));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
+ {
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemValue)));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ }
+ else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ auto imlRegR = imlInstruction->op_r_name.regR;
+
+ if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
+ {
+ uint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
+ uint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
+ uint32 offset = offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0);
+ ldr(fpReg(imlRegR), AdrUimm(HCPU_REG, offset));
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
+ {
+ ldr(fpReg(imlRegR), AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+void AArch64GenContext_t::name_r(IMLInstruction* imlInstruction)
+{
+ uint32 name = imlInstruction->op_r_name.name;
+
+ if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ XReg regRXReg = gpReg(imlInstruction->op_r_name.regR);
+ WReg regR = aliasAs(regRXReg);
+ if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
+ {
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)));
+ }
+ else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
+ {
+ uint32 sprIndex = (name - PPCREC_NAME_SPR0);
+ if (sprIndex == SPR_LR)
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
+ else if (sprIndex == SPR_CTR)
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.CTR)));
+ else if (sprIndex == SPR_XER)
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.XER)));
+ else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0)));
+ else
+ cemu_assert_suspicious();
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
+ {
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)));
+ }
+ else if (name == PPCREC_NAME_XER_CA)
+ {
+ strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_ca)));
+ }
+ else if (name == PPCREC_NAME_XER_SO)
+ {
+ strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_so)));
+ }
+ else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
+ {
+ strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_EA)
+ {
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemAddr)));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
+ {
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemValue)));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ }
+ else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ auto imlRegR = imlInstruction->op_r_name.regR;
+ if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
+ {
+ uint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
+ uint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
+ sint32 offset = offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double);
+ str(fpReg(imlRegR), AdrUimm(HCPU_REG, offset));
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
+ {
+ str(fpReg(imlRegR), AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+bool AArch64GenContext_t::r_r(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_r_r.regR);
+ WReg regA = gpReg(imlInstruction->op_r_r.regA);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
+ {
+ mov(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
+ {
+ rev(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32)
+ {
+ sxtb(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
+ {
+ sxth(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_NOT)
+ {
+ mvn(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
+ {
+ neg(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_CNTLZW)
+ {
+ clz(regR, regA);
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r(): Unsupported operation {:x}", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::r_s32(IMLInstruction* imlInstruction)
+{
+ sint32 imm32 = imlInstruction->op_r_immS32.immS32;
+ WReg reg = gpReg(imlInstruction->op_r_immS32.regR);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
+ {
+ mov(reg, imm32);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE)
+ {
+ ror(reg, reg, 32 - (imm32 & 0x1f));
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_s32(): Unsupported operation {:x}", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::r_r_s32(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_r_r_s32.regR);
+ WReg regA = gpReg(imlInstruction->op_r_r_s32.regA);
+ sint32 immS32 = imlInstruction->op_r_r_s32.immS32;
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ add_imm(regR, regA, immS32, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
+ {
+ sub_imm(regR, regA, immS32, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_AND)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ and_(regR, regA, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_OR)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ orr(regR, regA, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_XOR)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ eor(regR, regA, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ mul(regR, regA, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ {
+ lsl(regR, regA, (uint32)immS32 & 0x1f);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ {
+ lsr(regR, regA, (uint32)immS32 & 0x1f);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ {
+ asr(regR, regA, (uint32)immS32 & 0x1f);
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r_s32(): Unsupported operation {:x}", imlInstruction->operation);
+ cemu_assert_suspicious();
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::r_r_s32_carry(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_r_r_s32_carry.regR);
+ WReg regA = gpReg(imlInstruction->op_r_r_s32_carry.regA);
+ WReg regCarry = gpReg(imlInstruction->op_r_r_s32_carry.regCarry);
+
+ sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32;
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ adds_imm(regR, regA, immS32, TEMP_GPR1.WReg);
+ cset(regCarry, Cond::CS);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ADD_WITH_CARRY)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ cmp(regCarry, 1);
+ adcs(regR, regA, TEMP_GPR1.WReg);
+ cset(regCarry, Cond::CS);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ return false;
+ }
+
+ return true;
+}
+
+bool AArch64GenContext_t::r_r_r(IMLInstruction* imlInstruction)
+{
+ WReg regResult = gpReg(imlInstruction->op_r_r_r.regR);
+ XReg reg64Result = aliasAs(regResult);
+ WReg regOperand1 = gpReg(imlInstruction->op_r_r_r.regA);
+ WReg regOperand2 = gpReg(imlInstruction->op_r_r_r.regB);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ add(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
+ {
+ sub(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_OR)
+ {
+ orr(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_AND)
+ {
+ and_(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_XOR)
+ {
+ eor(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED)
+ {
+ mul(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SLW)
+ {
+ tst(regOperand2, 32);
+ lsl(regResult, regOperand1, regOperand2);
+ csel(regResult, regResult, wzr, Cond::EQ);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SRW)
+ {
+ tst(regOperand2, 32);
+ lsr(regResult, regOperand1, regOperand2);
+ csel(regResult, regResult, wzr, Cond::EQ);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE)
+ {
+ neg(TEMP_GPR1.WReg, regOperand2);
+ ror(regResult, regOperand1, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ {
+ asr(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ {
+ lsr(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ {
+ lsl(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED)
+ {
+ sdiv(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED)
+ {
+ udiv(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED)
+ {
+ smull(reg64Result, regOperand1, regOperand2);
+ lsr(reg64Result, reg64Result, 32);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED)
+ {
+ umull(reg64Result, regOperand1, regOperand2);
+ lsr(reg64Result, reg64Result, 32);
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r_r(): Unsupported operation {:x}", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::r_r_r_carry(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_r_r_r_carry.regR);
+ WReg regA = gpReg(imlInstruction->op_r_r_r_carry.regA);
+ WReg regB = gpReg(imlInstruction->op_r_r_r_carry.regB);
+ WReg regCarry = gpReg(imlInstruction->op_r_r_r_carry.regCarry);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ adds(regR, regA, regB);
+ cset(regCarry, Cond::CS);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ADD_WITH_CARRY)
+ {
+ cmp(regCarry, 1);
+ adcs(regR, regA, regB);
+ cset(regCarry, Cond::CS);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ return false;
+ }
+
+ return true;
+}
+
+Cond ImlCondToArm64Cond(IMLCondition condition)
+{
+ switch (condition)
+ {
+ case IMLCondition::EQ:
+ return Cond::EQ;
+ case IMLCondition::NEQ:
+ return Cond::NE;
+ case IMLCondition::UNSIGNED_GT:
+ return Cond::HI;
+ case IMLCondition::UNSIGNED_LT:
+ return Cond::LO;
+ case IMLCondition::SIGNED_GT:
+ return Cond::GT;
+ case IMLCondition::SIGNED_LT:
+ return Cond::LT;
+ default:
+ {
+ cemu_assert_suspicious();
+ return Cond::EQ;
+ }
+ }
+}
+
+void AArch64GenContext_t::compare(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_compare.regR);
+ WReg regA = gpReg(imlInstruction->op_compare.regA);
+ WReg regB = gpReg(imlInstruction->op_compare.regB);
+ Cond cond = ImlCondToArm64Cond(imlInstruction->op_compare.cond);
+ cmp(regA, regB);
+ cset(regR, cond);
+}
+
+void AArch64GenContext_t::compare_s32(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_compare.regR);
+ WReg regA = gpReg(imlInstruction->op_compare.regA);
+ sint32 imm = imlInstruction->op_compare_s32.immS32;
+ auto cond = ImlCondToArm64Cond(imlInstruction->op_compare.cond);
+ cmp_imm(regA, imm, TEMP_GPR1.WReg);
+ cset(regR, cond);
+}
+
+void AArch64GenContext_t::cjump(IMLInstruction* imlInstruction, IMLSegment* imlSegment)
+{
+ auto regBool = gpReg(imlInstruction->op_conditional_jump.registerBool);
+ prepareJump(ConditionalRegJumpInfo{
+ .target = imlSegment->nextSegmentBranchTaken,
+ .regBool = regBool,
+ .mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue,
+ });
+}
+
+void AArch64GenContext_t::jump(IMLSegment* imlSegment)
+{
+ prepareJump(UnconditionalJumpInfo{.target = imlSegment->nextSegmentBranchTaken});
+}
+
+void AArch64GenContext_t::conditionalJumpCycleCheck(IMLSegment* imlSegment)
+{
+ ldr(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles)));
+ prepareJump(NegativeRegValueJumpInfo{
+ .target = imlSegment->nextSegmentBranchTaken,
+ .regValue = TEMP_GPR1.WReg,
+ });
+}
+
+void* PPCRecompiler_virtualHLE(PPCInterpreter_t* ppcInterpreter, uint32 hleFuncId)
+{
+ void* prevRSPTemp = ppcInterpreter->rspTemp;
+ if (hleFuncId == 0xFFD0)
+ {
+ ppcInterpreter->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call
+ ppcInterpreter->gpr[3] = 0;
+ PPCInterpreter_nextInstruction(ppcInterpreter);
+ return PPCInterpreter_getCurrentInstance();
+ }
+ else
+ {
+ auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
+ cemu_assert(hleCall != nullptr);
+ hleCall(ppcInterpreter);
+ }
+ ppcInterpreter->rspTemp = prevRSPTemp;
+ return PPCInterpreter_getCurrentInstance();
+}
+
+bool AArch64GenContext_t::macro(IMLInstruction* imlInstruction)
+{
+ if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ WReg branchDstReg = gpReg(imlInstruction->op_macro.paramReg);
+
+ mov(TEMP_GPR1.WReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, branchDstReg, ShMod::LSL, 1);
+ ldr(TEMP_GPR1.XReg, AdrExt(PPC_REC_INSTANCE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ mov(LR.WReg, branchDstReg);
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_BL)
+ {
+ uint32 newLR = imlInstruction->op_macro.param + 4;
+
+ mov(TEMP_GPR1.WReg, newLR);
+ str(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
+
+ uint32 newIP = imlInstruction->op_macro.param2;
+ uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ mov(TEMP_GPR1.XReg, lookupOffset);
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ mov(LR.WReg, newIP);
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_B_FAR)
+ {
+ uint32 newIP = imlInstruction->op_macro.param2;
+ uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ mov(TEMP_GPR1.XReg, lookupOffset);
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ mov(LR.WReg, newIP);
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_LEAVE)
+ {
+ uint32 currentInstructionAddress = imlInstruction->op_macro.param;
+ mov(TEMP_GPR1.XReg, (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); // newIP = 0 special value for recompiler exit
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ mov(LR.WReg, currentInstructionAddress);
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK)
+ {
+ brk(0xf000);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES)
+ {
+ uint32 cycleCount = imlInstruction->op_macro.param;
+ AdrUimm adrCycles = AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles));
+ ldr(TEMP_GPR1.WReg, adrCycles);
+ sub_imm(TEMP_GPR1.WReg, TEMP_GPR1.WReg, cycleCount, TEMP_GPR2.WReg);
+ str(TEMP_GPR1.WReg, adrCycles);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_HLE)
+ {
+ uint32 ppcAddress = imlInstruction->op_macro.param;
+ uint32 funcId = imlInstruction->op_macro.param2;
+ Label cyclesLeftLabel;
+
+ // update instruction pointer
+ mov(TEMP_GPR1.WReg, ppcAddress);
+ str(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
+ // set parameters
+ str(x30, AdrPreImm(sp, -16));
+
+ mov(x0, HCPU_REG);
+ mov(w1, funcId);
+ // call HLE function
+
+ mov(TEMP_GPR1.XReg, (uint64)PPCRecompiler_virtualHLE);
+ blr(TEMP_GPR1.XReg);
+
+ mov(HCPU_REG, x0);
+
+ ldr(x30, AdrPostImm(sp, 16));
+
+ // check if cycles where decreased beyond zero, if yes -> leave recompiler
+ ldr(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles)));
+ tbz(TEMP_GPR1.WReg, 31, cyclesLeftLabel); // check if negative
+
+ mov(TEMP_GPR1.XReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ ldr(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
+ // branch to recompiler exit
+ br(TEMP_GPR1.XReg);
+
+ L(cyclesLeftLabel);
+ // check if instruction pointer was changed
+ // assign new instruction pointer to LR.WReg
+ ldr(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
+ mov(TEMP_GPR1.XReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ add(TEMP_GPR1.XReg, TEMP_GPR1.XReg, LR.XReg, ShMod::LSL, 1);
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ // branch to [ppcRecompilerDirectJumpTable + PPCInterpreter_t::instructionPointer * 2]
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "Unknown recompiler macro operation %d\n", imlInstruction->operation);
+ cemu_assert_suspicious();
+ }
+ return false;
+}
+
+bool AArch64GenContext_t::load(IMLInstruction* imlInstruction, bool indexed)
+{
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
+ if (indexed)
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
+
+ sint32 memOffset = imlInstruction->op_storeLoad.immS32;
+ bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
+ bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
+ WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
+ WReg dataReg = gpReg(imlInstruction->op_storeLoad.registerData);
+
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, gpReg(imlInstruction->op_storeLoad.registerMem2));
+
+ auto adr = AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW);
+ if (imlInstruction->op_storeLoad.copyWidth == 32)
+ {
+ ldr(dataReg, adr);
+ if (switchEndian)
+ rev(dataReg, dataReg);
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 16)
+ {
+ if (switchEndian)
+ {
+ ldrh(dataReg, adr);
+ rev(dataReg, dataReg);
+ if (signExtend)
+ asr(dataReg, dataReg, 16);
+ else
+ lsr(dataReg, dataReg, 16);
+ }
+ else
+ {
+ if (signExtend)
+ ldrsh(dataReg, adr);
+ else
+ ldrh(dataReg, adr);
+ }
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 8)
+ {
+ if (signExtend)
+ ldrsb(dataReg, adr);
+ else
+ ldrb(dataReg, adr);
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::store(IMLInstruction* imlInstruction, bool indexed)
+{
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
+ if (indexed)
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
+
+ WReg dataReg = gpReg(imlInstruction->op_storeLoad.registerData);
+ WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
+ sint32 memOffset = imlInstruction->op_storeLoad.immS32;
+ bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
+
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, gpReg(imlInstruction->op_storeLoad.registerMem2));
+ AdrExt adr = AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW);
+ if (imlInstruction->op_storeLoad.copyWidth == 32)
+ {
+ if (swapEndian)
+ {
+ rev(TEMP_GPR2.WReg, dataReg);
+ str(TEMP_GPR2.WReg, adr);
+ }
+ else
+ {
+ str(dataReg, adr);
+ }
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 16)
+ {
+ if (swapEndian)
+ {
+ rev(TEMP_GPR2.WReg, dataReg);
+ lsr(TEMP_GPR2.WReg, TEMP_GPR2.WReg, 16);
+ strh(TEMP_GPR2.WReg, adr);
+ }
+ else
+ {
+ strh(dataReg, adr);
+ }
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 8)
+ {
+ strb(dataReg, adr);
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+void AArch64GenContext_t::atomic_cmp_store(IMLInstruction* imlInstruction)
+{
+ WReg outReg = gpReg(imlInstruction->op_atomic_compare_store.regBoolOut);
+ WReg eaReg = gpReg(imlInstruction->op_atomic_compare_store.regEA);
+ WReg valReg = gpReg(imlInstruction->op_atomic_compare_store.regWriteValue);
+ WReg cmpValReg = gpReg(imlInstruction->op_atomic_compare_store.regCompareValue);
+
+ if (s_cpu.isAtomicSupported())
+ {
+ mov(TEMP_GPR2.WReg, cmpValReg);
+ add(TEMP_GPR1.XReg, MEM_BASE_REG, eaReg, ExtMod::UXTW);
+ casal(TEMP_GPR2.WReg, valReg, AdrNoOfs(TEMP_GPR1.XReg));
+ cmp(TEMP_GPR2.WReg, cmpValReg);
+ cset(outReg, Cond::EQ);
+ }
+ else
+ {
+ Label notEqual;
+ Label storeFailed;
+
+ add(TEMP_GPR1.XReg, MEM_BASE_REG, eaReg, ExtMod::UXTW);
+ L(storeFailed);
+ ldaxr(TEMP_GPR2.WReg, AdrNoOfs(TEMP_GPR1.XReg));
+ cmp(TEMP_GPR2.WReg, cmpValReg);
+ bne(notEqual);
+ stlxr(TEMP_GPR2.WReg, valReg, AdrNoOfs(TEMP_GPR1.XReg));
+ cbnz(TEMP_GPR2.WReg, storeFailed);
+
+ L(notEqual);
+ cset(outReg, Cond::EQ);
+ }
+}
+
+bool AArch64GenContext_t::fpr_load(IMLInstruction* imlInstruction, bool indexed)
+{
+ const IMLReg& dataReg = imlInstruction->op_storeLoad.registerData;
+ SReg dataSReg = fpReg(dataReg);
+ DReg dataDReg = fpReg(dataReg);
+ WReg realRegisterMem = gpReg(imlInstruction->op_storeLoad.registerMem);
+ WReg indexReg = indexed ? gpReg(imlInstruction->op_storeLoad.registerMem2) : wzr;
+ sint32 adrOffset = imlInstruction->op_storeLoad.immS32;
+ uint8 mode = imlInstruction->op_storeLoad.mode;
+
+ if (mode == PPCREC_FPR_LD_MODE_SINGLE)
+ {
+ add_imm(TEMP_GPR1.WReg, realRegisterMem, adrOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+ ldr(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
+ fmov(dataSReg, TEMP_GPR2.WReg);
+
+ if (imlInstruction->op_storeLoad.flags2.notExpanded)
+ {
+ // leave value as single
+ }
+ else
+ {
+ fcvt(dataDReg, dataSReg);
+ }
+ }
+ else if (mode == PPCREC_FPR_LD_MODE_DOUBLE)
+ {
+ add_imm(TEMP_GPR1.WReg, realRegisterMem, adrOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+ ldr(TEMP_GPR2.XReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ rev(TEMP_GPR2.XReg, TEMP_GPR2.XReg);
+ fmov(dataDReg, TEMP_GPR2.XReg);
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+// store to memory
+bool AArch64GenContext_t::fpr_store(IMLInstruction* imlInstruction, bool indexed)
+{
+ const IMLReg& dataImlReg = imlInstruction->op_storeLoad.registerData;
+ DReg dataDReg = fpReg(dataImlReg);
+ SReg dataSReg = fpReg(dataImlReg);
+ WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
+ WReg indexReg = indexed ? gpReg(imlInstruction->op_storeLoad.registerMem2) : wzr;
+ sint32 memOffset = imlInstruction->op_storeLoad.immS32;
+ uint8 mode = imlInstruction->op_storeLoad.mode;
+
+ if (mode == PPCREC_FPR_ST_MODE_SINGLE)
+ {
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+
+ if (imlInstruction->op_storeLoad.flags2.notExpanded)
+ {
+ // value is already in single format
+ fmov(TEMP_GPR2.WReg, dataSReg);
+ }
+ else
+ {
+ fcvt(TEMP_FPR.SReg, dataDReg);
+ fmov(TEMP_GPR2.WReg, TEMP_FPR.SReg);
+ }
+ rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
+ str(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ }
+ else if (mode == PPCREC_FPR_ST_MODE_DOUBLE)
+ {
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+ fmov(TEMP_GPR2.XReg, dataDReg);
+ rev(TEMP_GPR2.XReg, TEMP_GPR2.XReg);
+ str(TEMP_GPR2.XReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ }
+ else if (mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0)
+ {
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+ fmov(TEMP_GPR2.WReg, dataSReg);
+ rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
+ str(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
+ return false;
+ }
+ return true;
+}
+
+// FPR op FPR
+void AArch64GenContext_t::fpr_r_r(IMLInstruction* imlInstruction)
+{
+ auto imlRegR = imlInstruction->op_fpr_r_r.regR;
+ auto imlRegA = imlInstruction->op_fpr_r_r.regA;
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT)
+ {
+ fcvtzs(gpReg(imlRegR), fpReg(imlRegA));
+ return;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
+ {
+ scvtf(fpReg(imlRegR), gpReg(imlRegA));
+ return;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
+ {
+ cemu_assert_debug(imlRegR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
+ // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
+ DReg regFprDReg = fpReg(imlRegR);
+ SReg regFprSReg = fpReg(imlRegR);
+ if (imlRegA.GetRegFormat() == IMLRegFormat::I32)
+ {
+ fmov(regFprSReg, gpReg(imlRegA));
+ // float to double
+ fcvt(regFprDReg, regFprSReg);
+ }
+ else if (imlRegA.GetRegFormat() == IMLRegFormat::I64)
+ {
+ fmov(regFprDReg, gpReg(imlRegA));
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+ return;
+ }
+
+ DReg regR = fpReg(imlRegR);
+ DReg regA = fpReg(imlRegA);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN)
+ {
+ fmov(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
+ {
+ fmul(regR, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE)
+ {
+ fdiv(regR, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
+ {
+ fadd(regR, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB)
+ {
+ fsub(regR, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ)
+ {
+ fcvtzs(regR, regA);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+void AArch64GenContext_t::fpr_r_r_r(IMLInstruction* imlInstruction)
+{
+ DReg regR = fpReg(imlInstruction->op_fpr_r_r_r.regR);
+ DReg regA = fpReg(imlInstruction->op_fpr_r_r_r.regA);
+ DReg regB = fpReg(imlInstruction->op_fpr_r_r_r.regB);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
+ {
+ fmul(regR, regA, regB);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
+ {
+ fadd(regR, regA, regB);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB)
+ {
+ fsub(regR, regA, regB);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+/*
+ * FPR = op (fprA, fprB, fprC)
+ */
+void AArch64GenContext_t::fpr_r_r_r_r(IMLInstruction* imlInstruction)
+{
+ DReg regR = fpReg(imlInstruction->op_fpr_r_r_r_r.regR);
+ DReg regA = fpReg(imlInstruction->op_fpr_r_r_r_r.regA);
+ DReg regB = fpReg(imlInstruction->op_fpr_r_r_r_r.regB);
+ DReg regC = fpReg(imlInstruction->op_fpr_r_r_r_r.regC);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT)
+ {
+ fcmp(regA, 0.0);
+ fcsel(regR, regC, regB, Cond::GE);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+void AArch64GenContext_t::fpr_r(IMLInstruction* imlInstruction)
+{
+ DReg regRDReg = fpReg(imlInstruction->op_fpr_r.regR);
+ SReg regRSReg = fpReg(imlInstruction->op_fpr_r.regR);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE)
+ {
+ fneg(regRDReg, regRDReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE)
+ {
+ fmov(regRDReg, 1.0);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ABS)
+ {
+ fabs(regRDReg, regRDReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS)
+ {
+ fabs(regRDReg, regRDReg);
+ fneg(regRDReg, regRDReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
+ {
+ // convert to 32bit single
+ fcvt(regRSReg, regRDReg);
+ // convert back to 64bit double
+ fcvt(regRDReg, regRSReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
+ {
+ // convert bottom to 64bit double
+ fcvt(regRDReg, regRSReg);
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+}
+
+Cond ImlFPCondToArm64Cond(IMLCondition cond)
+{
+ switch (cond)
+ {
+ case IMLCondition::UNORDERED_GT:
+ return Cond::GT;
+ case IMLCondition::UNORDERED_LT:
+ return Cond::MI;
+ case IMLCondition::UNORDERED_EQ:
+ return Cond::EQ;
+ case IMLCondition::UNORDERED_U:
+ return Cond::VS;
+ default:
+ {
+ cemu_assert_suspicious();
+ return Cond::EQ;
+ }
+ }
+}
+
+void AArch64GenContext_t::fpr_compare(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_fpr_compare.regR);
+ DReg regA = fpReg(imlInstruction->op_fpr_compare.regA);
+ DReg regB = fpReg(imlInstruction->op_fpr_compare.regB);
+ auto cond = ImlFPCondToArm64Cond(imlInstruction->op_fpr_compare.cond);
+ fcmp(regA, regB);
+ cset(regR, cond);
+}
+
+void AArch64GenContext_t::call_imm(IMLInstruction* imlInstruction)
+{
+ str(x30, AdrPreImm(sp, -16));
+ mov(TEMP_GPR1.XReg, imlInstruction->op_call_imm.callAddress);
+ blr(TEMP_GPR1.XReg);
+ ldr(x30, AdrPostImm(sp, 16));
+}
+
+bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext)
+{
+ AArch64Allocator allocator;
+ AArch64GenContext_t aarch64GenContext{&allocator};
+
+ // generate iml instruction code
+ bool codeGenerationFailed = false;
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ if (codeGenerationFailed)
+ break;
+ segIt->x64Offset = aarch64GenContext.getSize();
+
+ aarch64GenContext.storeSegmentStart(segIt);
+
+ for (size_t i = 0; i < segIt->imlList.size(); i++)
+ {
+ IMLInstruction* imlInstruction = segIt->imlList.data() + i;
+ if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME)
+ {
+ aarch64GenContext.r_name(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_NAME_R)
+ {
+ aarch64GenContext.name_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R)
+ {
+ if (!aarch64GenContext.r_r(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
+ {
+ if (!aarch64GenContext.r_s32(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ if (!aarch64GenContext.r_r_s32(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ if (!aarch64GenContext.r_r_s32_carry(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
+ {
+ if (!aarch64GenContext.r_r_r(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ if (!aarch64GenContext.r_r_r_carry(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
+ {
+ aarch64GenContext.compare(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ aarch64GenContext.compare_s32(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ aarch64GenContext.cjump(imlInstruction, segIt);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
+ {
+ aarch64GenContext.jump(segIt);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
+ {
+ aarch64GenContext.conditionalJumpCycleCheck(segIt);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO)
+ {
+ if (!aarch64GenContext.macro(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD)
+ {
+ if (!aarch64GenContext.load(imlInstruction, false))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ {
+ if (!aarch64GenContext.load(imlInstruction, true))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_STORE)
+ {
+ if (!aarch64GenContext.store(imlInstruction, false))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED)
+ {
+ if (!aarch64GenContext.store(imlInstruction, true))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ aarch64GenContext.atomic_cmp_store(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ aarch64GenContext.call_imm(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_NO_OP)
+ {
+ // no op
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD)
+ {
+ if (!aarch64GenContext.fpr_load(imlInstruction, false))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
+ {
+ if (!aarch64GenContext.fpr_load(imlInstruction, true))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE)
+ {
+ if (!aarch64GenContext.fpr_store(imlInstruction, false))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
+ {
+ if (!aarch64GenContext.fpr_store(imlInstruction, true))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R)
+ {
+ aarch64GenContext.fpr_r_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R)
+ {
+ aarch64GenContext.fpr_r_r_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ {
+ aarch64GenContext.fpr_r_r_r_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
+ {
+ aarch64GenContext.fpr_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE)
+ {
+ aarch64GenContext.fpr_compare(imlInstruction);
+ }
+ else
+ {
+ codeGenerationFailed = true;
+ cemu_assert_suspicious();
+ cemuLog_log(LogType::Recompiler, "PPCRecompiler_generateAArch64Code(): Unsupported iml type {}", imlInstruction->type);
+ }
+ }
+ }
+
+ // handle failed code generation
+ if (codeGenerationFailed)
+ {
+ return false;
+ }
+
+ if (!aarch64GenContext.processAllJumps())
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompiler_generateAArch64Code(): some jumps exceeded the +/-128MB offset.");
+ return false;
+ }
+
+ aarch64GenContext.readyRE();
+
+ // set code
+ PPCRecFunction->x86Code = aarch64GenContext.getCode();
+ PPCRecFunction->x86Size = aarch64GenContext.getMaxSize();
+ // set free disabled to skip freeing the code from the CodeGenerator destructor
+ allocator.setFreeDisabled(true);
+ return true;
+}
+
+void PPCRecompiler_cleanupAArch64Code(void* code, size_t size)
+{
+ AArch64Allocator allocator;
+ if (allocator.useProtect())
+ CodeArray::protect(code, size, CodeArray::PROTECT_RW);
+ allocator.free(static_cast(code));
+}
+
+void AArch64GenContext_t::enterRecompilerCode()
+{
+ constexpr size_t STACK_SIZE = 160 /* x19 .. x30 + v8.d[0] .. v15.d[0] */;
+ static_assert(STACK_SIZE % 16 == 0);
+ sub(sp, sp, STACK_SIZE);
+ mov(x9, sp);
+
+ stp(x19, x20, AdrPostImm(x9, 16));
+ stp(x21, x22, AdrPostImm(x9, 16));
+ stp(x23, x24, AdrPostImm(x9, 16));
+ stp(x25, x26, AdrPostImm(x9, 16));
+ stp(x27, x28, AdrPostImm(x9, 16));
+ stp(x29, x30, AdrPostImm(x9, 16));
+ st4((v8.d - v11.d)[0], AdrPostImm(x9, 32));
+ st4((v12.d - v15.d)[0], AdrPostImm(x9, 32));
+ mov(HCPU_REG, x1); // call argument 2
+ mov(PPC_REC_INSTANCE_REG, (uint64)ppcRecompilerInstanceData);
+ mov(MEM_BASE_REG, (uint64)memory_base);
+
+ // branch to recFunc
+ blr(x0); // call argument 1
+
+ mov(x9, sp);
+ ldp(x19, x20, AdrPostImm(x9, 16));
+ ldp(x21, x22, AdrPostImm(x9, 16));
+ ldp(x23, x24, AdrPostImm(x9, 16));
+ ldp(x25, x26, AdrPostImm(x9, 16));
+ ldp(x27, x28, AdrPostImm(x9, 16));
+ ldp(x29, x30, AdrPostImm(x9, 16));
+ ld4((v8.d - v11.d)[0], AdrPostImm(x9, 32));
+ ld4((v12.d - v15.d)[0], AdrPostImm(x9, 32));
+
+ add(sp, sp, STACK_SIZE);
+
+ ret();
+}
+
+void AArch64GenContext_t::leaveRecompilerCode()
+{
+ str(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
+ ret();
+}
+
+bool initializedInterfaceFunctions = false;
+AArch64GenContext_t enterRecompilerCode_ctx{};
+
+AArch64GenContext_t leaveRecompilerCode_unvisited_ctx{};
+AArch64GenContext_t leaveRecompilerCode_visited_ctx{};
+void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions()
+{
+ if (initializedInterfaceFunctions)
+ return;
+ initializedInterfaceFunctions = true;
+
+ enterRecompilerCode_ctx.enterRecompilerCode();
+ enterRecompilerCode_ctx.readyRE();
+ PPCRecompiler_enterRecompilerCode = enterRecompilerCode_ctx.getCode();
+
+ leaveRecompilerCode_unvisited_ctx.leaveRecompilerCode();
+ leaveRecompilerCode_unvisited_ctx.readyRE();
+ PPCRecompiler_leaveRecompilerCode_unvisited = leaveRecompilerCode_unvisited_ctx.getCode();
+
+ leaveRecompilerCode_visited_ctx.leaveRecompilerCode();
+ leaveRecompilerCode_visited_ctx.readyRE();
+ PPCRecompiler_leaveRecompilerCode_visited = leaveRecompilerCode_visited_ctx.getCode();
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
new file mode 100644
index 00000000..b610ee04
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "HW/Espresso/Recompiler/IML/IMLInstruction.h"
+#include "../PPCRecompiler.h"
+
+bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext);
+void PPCRecompiler_cleanupAArch64Code(void* code, size_t size);
+
+void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
+
+// architecture specific constants
+namespace IMLArchAArch64
+{
+ static constexpr int PHYSREG_GPR_BASE = 0;
+ static constexpr int PHYSREG_GPR_COUNT = 25;
+ static constexpr int PHYSREG_FPR_BASE = PHYSREG_GPR_COUNT;
+ static constexpr int PHYSREG_FPR_COUNT = 31;
+}; // namespace IMLArchAArch64
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
new file mode 100644
index 00000000..eadb80fb
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
@@ -0,0 +1,1672 @@
+#include "Cafe/HW/Espresso/PPCState.h"
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h"
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+#include "BackendX64.h"
+#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
+#include "util/MemMapper/MemMapper.h"
+#include "Common/cpu_features.h"
+#include
+
+static x86Assembler64::GPR32 _reg32(IMLReg physReg)
+{
+ cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32);
+ IMLRegID regId = physReg.GetRegID();
+ cemu_assert_debug(regId < 16);
+ return (x86Assembler64::GPR32)regId;
+}
+
+static uint32 _reg64(IMLReg physReg)
+{
+ cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64);
+ IMLRegID regId = physReg.GetRegID();
+ cemu_assert_debug(regId < 16);
+ return regId;
+}
+
+uint32 _regF64(IMLReg physReg)
+{
+ cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::F64);
+ IMLRegID regId = physReg.GetRegID();
+ cemu_assert_debug(regId >= IMLArchX86::PHYSREG_FPR_BASE && regId < IMLArchX86::PHYSREG_FPR_BASE+16);
+ regId -= IMLArchX86::PHYSREG_FPR_BASE;
+ return regId;
+}
+
+static x86Assembler64::GPR8_REX _reg8(IMLReg physReg)
+{
+ cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); // for now these are represented as 32bit
+ return (x86Assembler64::GPR8_REX)physReg.GetRegID();
+}
+
+static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
+{
+ return (x86Assembler64::GPR32)regId;
+}
+
+static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
+{
+ return (x86Assembler64::GPR8_REX)regId;
+}
+
+static x86Assembler64::GPR8_REX _reg8_from_reg64(uint32 regId)
+{
+ return (x86Assembler64::GPR8_REX)regId;
+}
+
+static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId)
+{
+ return (x86Assembler64::GPR64)regId;
+}
+
+X86Cond _x86Cond(IMLCondition imlCond)
+{
+ switch (imlCond)
+ {
+ case IMLCondition::EQ:
+ return X86_CONDITION_Z;
+ case IMLCondition::NEQ:
+ return X86_CONDITION_NZ;
+ case IMLCondition::UNSIGNED_GT:
+ return X86_CONDITION_NBE;
+ case IMLCondition::UNSIGNED_LT:
+ return X86_CONDITION_B;
+ case IMLCondition::SIGNED_GT:
+ return X86_CONDITION_NLE;
+ case IMLCondition::SIGNED_LT:
+ return X86_CONDITION_L;
+ default:
+ break;
+ }
+ cemu_assert_suspicious();
+ return X86_CONDITION_Z;
+}
+
+X86Cond _x86CondInverted(IMLCondition imlCond)
+{
+ switch (imlCond)
+ {
+ case IMLCondition::EQ:
+ return X86_CONDITION_NZ;
+ case IMLCondition::NEQ:
+ return X86_CONDITION_Z;
+ case IMLCondition::UNSIGNED_GT:
+ return X86_CONDITION_BE;
+ case IMLCondition::UNSIGNED_LT:
+ return X86_CONDITION_NB;
+ case IMLCondition::SIGNED_GT:
+ return X86_CONDITION_LE;
+ case IMLCondition::SIGNED_LT:
+ return X86_CONDITION_NL;
+ default:
+ break;
+ }
+ cemu_assert_suspicious();
+ return X86_CONDITION_Z;
+}
+
+X86Cond _x86Cond(IMLCondition imlCond, bool condIsInverted)
+{
+ if (condIsInverted)
+ return _x86CondInverted(imlCond);
+ return _x86Cond(imlCond);
+}
+
+/*
+* Remember current instruction output offset for reloc
+* The instruction generated after this method has been called will be adjusted
+*/
+void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr)
+{
+ x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo);
+}
+
+void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset)
+{
+ uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset;
+ if (instructionData[0] == 0x0F && (instructionData[1] >= 0x80 && instructionData[1] <= 0x8F))
+ {
+ // far conditional jump
+ *(uint32*)(instructionData + 2) = (destinationOffset - (jumpInstructionOffset + 6));
+ }
+ else if (instructionData[0] >= 0x70 && instructionData[0] <= 0x7F)
+ {
+ // short conditional jump
+ sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2)));
+ cemu_assert_debug(distance >= -128 && distance <= 127);
+ *(uint8*)(instructionData + 1) = (uint8)distance;
+ }
+ else if (instructionData[0] == 0xE9)
+ {
+ *(uint32*)(instructionData + 1) = (destinationOffset - (jumpInstructionOffset + 5));
+ }
+ else if (instructionData[0] == 0xEB)
+ {
+ sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2)));
+ cemu_assert_debug(distance >= -128 && distance <= 127);
+ *(uint8*)(instructionData + 1) = (uint8)distance;
+ }
+ else
+ {
+ assert_dbg();
+ }
+}
+
+void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId)
+{
+ void* prevRSPTemp = hCPU->rspTemp;
+ if( hleFuncId == 0xFFD0 )
+ {
+ hCPU->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call
+ hCPU->gpr[3] = 0;
+ PPCInterpreter_nextInstruction(hCPU);
+ return hCPU;
+ }
+ else
+ {
+ auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
+ cemu_assert(hleCall != nullptr);
+ hleCall(hCPU);
+ }
+ hCPU->rspTemp = prevRSPTemp;
+ return PPCInterpreter_getCurrentInstance();
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ //x64Gen_int3(x64GenContext);
+ uint32 branchDstReg = _reg32(imlInstruction->op_macro.paramReg);
+ if(X86_REG_RDX != branchDstReg)
+ x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg);
+ // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX
+ // JMP [offset+RDX*(8/4)+R15]
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA4);
+ x64Gen_writeU8(x64GenContext, 0x57);
+ x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_BL )
+ {
+ // MOV DWORD [SPR_LinkRegister], newLR
+ uint32 newLR = imlInstruction->op_macro.param + 4;
+ x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), newLR);
+ // remember new instruction pointer in RDX
+ uint32 newIP = imlInstruction->op_macro.param2;
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP);
+ // since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary
+ uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ if (lookupOffset >= 0x80000000ULL)
+ {
+ // JMP [offset+RDX*(8/4)+R15]
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA4);
+ x64Gen_writeU8(x64GenContext, 0x57);
+ x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ }
+ else
+ {
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA7);
+ x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
+ }
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_B_FAR )
+ {
+ // remember new instruction pointer in RDX
+ uint32 newIP = imlInstruction->op_macro.param2;
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP);
+ // Since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary
+ uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ if (lookupOffset >= 0x80000000ULL)
+ {
+ // JMP [offset+RDX*(8/4)+R15]
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA4);
+ x64Gen_writeU8(x64GenContext, 0x57);
+ x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ }
+ else
+ {
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA7);
+ x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
+ }
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_LEAVE )
+ {
+ uint32 currentInstructionAddress = imlInstruction->op_macro.param;
+ // remember PC value in REG_EDX
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, currentInstructionAddress);
+
+ uint32 newIP = 0; // special value for recompiler exit
+ uint64 lookupOffset = (uint64)&(((PPCRecompilerInstanceData_t*)NULL)->ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ // JMP [R15+offset]
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA7);
+ x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK )
+ {
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_macro.param2);
+ x64Gen_int3(x64GenContext);
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES )
+ {
+ uint32 cycleCount = imlInstruction->op_macro.param;
+ x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), cycleCount);
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_HLE )
+ {
+ uint32 ppcAddress = imlInstruction->op_macro.param;
+ uint32 funcId = imlInstruction->op_macro.param2;
+ // update instruction pointer
+ x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress);
+ // set parameters
+ x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, REG_RESV_HCPU);
+ x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RDX, funcId);
+ // restore stackpointer from hCPU->rspTemp
+ x64Emit_mov_reg64_mem64(x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp));
+ // reserve space on stack for call parameters
+ x64Gen_sub_reg64_imm32(x64GenContext, X86_REG_RSP, 8*11); // must be uneven number in order to retain stack 0x10 alignment
+ x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RBP, 0);
+ // call HLE function
+ x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_virtualHLE);
+ x64Gen_call_reg64(x64GenContext, X86_REG_RAX);
+ // restore RSP to hCPU (from RAX, result of PPCRecompiler_virtualHLE)
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_HCPU, X86_REG_RAX);
+ // MOV R15, ppcRecompilerInstanceData
+ x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData);
+ // MOV R13, memory_base
+ x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base);
+ // check if cycles where decreased beyond zero, if yes -> leave recompiler
+ x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
+ sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0);
+
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RDX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer));
+ // set EAX to 0 (we assume that ppcRecompilerDirectJumpTable[0] will be a recompiler escape function)
+ x64Gen_xor_reg32_reg32(x64GenContext, X86_REG_RAX, X86_REG_RAX);
+ // ADD RAX, REG_RESV_RECDATA
+ x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA);
+ // JMP [recompilerCallTable+EAX/4*8]
+ x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
+ // check if instruction pointer was changed
+ // assign new instruction pointer to EAX
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer));
+ // remember instruction pointer in REG_EDX
+ x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, X86_REG_RAX);
+ // EAX *= 2
+ x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_RAX);
+ // ADD RAX, REG_RESV_RECDATA
+ x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA);
+ // JMP [ppcRecompilerDirectJumpTable+RAX/4*8]
+ x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ return true;
+ }
+ else
+ {
+ debug_printf("Unknown recompiler macro operation %d\n", imlInstruction->operation);
+ assert_dbg();
+ }
+ return false;
+}
+
+/*
+* Load from memory
+*/
+bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
+{
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
+ if (indexed)
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
+
+ IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
+ IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
+ IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
+ if( indexed )
+ realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
+ if( indexed && realRegisterMem == realRegisterMem2 )
+ {
+ return false;
+ }
+ if( indexed && realRegisterData == realRegisterMem2 )
+ {
+ // for indexed memory access realRegisterData must not be the same register as the second memory register,
+ // this can easily be worked around by swapping realRegisterMem and realRegisterMem2
+ std::swap(realRegisterMem, realRegisterMem2);
+ }
+
+ bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
+ bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
+ if( imlInstruction->op_storeLoad.copyWidth == 32 )
+ {
+ if (indexed)
+ {
+ x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2);
+ }
+ if( g_CPUFeatures.x86.movbe && switchEndian )
+ {
+ if (indexed)
+ {
+ x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
+ }
+ else
+ {
+ x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ }
+ }
+ else
+ {
+ if (indexed)
+ {
+ x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
+ if (switchEndian)
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
+ }
+ else
+ {
+ x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ if (switchEndian)
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
+ }
+ }
+ }
+ else if( imlInstruction->op_storeLoad.copyWidth == 16 )
+ {
+ if (indexed)
+ {
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ if(g_CPUFeatures.x86.movbe && switchEndian )
+ {
+ x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ if( indexed && realRegisterMem != realRegisterData )
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else
+ {
+ x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ if( indexed && realRegisterMem != realRegisterData )
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ if( switchEndian )
+ x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8);
+ }
+ if( signExtend )
+ x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
+ else
+ x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
+ }
+ else if( imlInstruction->op_storeLoad.copyWidth == 8 )
+ {
+ if( indexed )
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ if( signExtend )
+ x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ else
+ x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ if( indexed && realRegisterMem != realRegisterData )
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else
+ return false;
+ return true;
+}
+
+/*
+* Write to memory
+*/
+bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
+{
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
+ if (indexed)
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
+
+ IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
+ IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
+ IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
+ if (indexed)
+ realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
+
+ if (indexed && realRegisterMem == realRegisterMem2)
+ {
+ return false;
+ }
+ if (indexed && realRegisterData == realRegisterMem2)
+ {
+ // for indexed memory access realRegisterData must not be the same register as the second memory register,
+ // this can easily be worked around by swapping realRegisterMem and realRegisterMem2
+ std::swap(realRegisterMem, realRegisterMem2);
+ }
+
+ bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
+ bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
+ if (imlInstruction->op_storeLoad.copyWidth == 32)
+ {
+ uint32 valueRegister;
+ if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData)
+ {
+ valueRegister = realRegisterData;
+ }
+ else
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
+ valueRegister = REG_RESV_TEMP;
+ }
+ if (!g_CPUFeatures.x86.movbe && swapEndian)
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister);
+ if (indexed)
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ if (g_CPUFeatures.x86.movbe && swapEndian)
+ x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
+ else
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
+ if (indexed)
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 16)
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
+ if (swapEndian)
+ x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8);
+ if (indexed)
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ if (indexed)
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ // todo: Optimize this, e.g. by using MOVBE
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 8)
+ {
+ if (indexed && realRegisterMem == realRegisterData)
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
+ realRegisterData = REG_RESV_TEMP;
+ }
+ if (indexed)
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData);
+ if (indexed)
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else
+ return false;
+ return true;
+}
+
+void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut));
+ auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA);
+ auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue);
+ auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue);
+
+ cemu_assert_debug(regBoolOut == X86_REG_EAX);
+ cemu_assert_debug(regEA != X86_REG_EAX);
+ cemu_assert_debug(regVal != X86_REG_EAX);
+ cemu_assert_debug(regCmp != X86_REG_EAX);
+
+ x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp);
+ x64GenContext->emitter->LockPrefix();
+ x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal);
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut);
+ x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here
+}
+
+void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ // the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here
+ x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x20); // reserve enough space for any parameters while keeping stack alignment of 16 intact
+ x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress);
+ x64GenContext->emitter->CALL_q(X86_REG_RAX);
+ x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x20);
+ // a note about the stack pointer:
+ // currently the code generated by generateEnterRecompilerCode makes sure the stack is 16 byte aligned, so we don't need to fix it up here
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_r.regR);
+ auto regA = _reg32(imlInstruction->op_r_r.regA);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
+ {
+ // registerResult = registerA
+ if (regR != regA)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
+ {
+ if (regA != regR)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); // if movbe is available we can move and swap in a single instruction?
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, regR);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 )
+ {
+ x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
+ {
+ x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, regR, reg32ToReg16(regA));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_NOT )
+ {
+ // copy register content if different registers
+ if( regR != regA )
+ x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
+ x64Gen_not_reg64Low32(x64GenContext, regR);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
+ {
+ // copy register content if different registers
+ if (regR != regA)
+ x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
+ x64Gen_neg_reg64Low32(x64GenContext, regR);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW )
+ {
+ // count leading zeros
+ // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
+ if(g_CPUFeatures.x86.lzcnt)
+ {
+ x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ }
+ else
+ {
+ x64Gen_test_reg64Low32_reg64Low32(x64GenContext, regA, regA);
+ sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0);
+ x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ x64Gen_neg_reg64Low32(x64GenContext, regR);
+ x64Gen_add_reg64Low32_imm32(x64GenContext, regR, 32-1);
+ sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, 32);
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP)
+ {
+ x64GenContext->emitter->CMP_dd(regR, regA);
+ }
+ else
+ {
+ cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_immS32.regR);
+
+ if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
+ {
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, (uint32)imlInstruction->op_r_immS32.immS32);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
+ {
+ cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0);
+ x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP)
+ {
+ sint32 imm = imlInstruction->op_r_immS32.immS32;
+ x64GenContext->emitter->CMP_di32(regR, imm);
+ }
+ else
+ {
+ cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR);
+ auto rRegOperand1 = _reg32(imlInstruction->op_r_r_r.regA);
+ auto rRegOperand2 = _reg32(imlInstruction->op_r_r_r.regB);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ // registerResult = registerOperand1 + registerOperand2
+ if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
+ {
+ // be careful not to overwrite the operand before we use it
+ if( rRegResult == rRegOperand1 )
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ else
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
+ }
+ else
+ {
+ // copy operand1 to destination register before doing addition
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
+ {
+ if( rRegOperand1 == rRegOperand2 )
+ {
+ // result = operand1 - operand1 -> 0
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult);
+ }
+ else if( rRegResult == rRegOperand1 )
+ {
+ // result = result - operand2
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ else if ( rRegResult == rRegOperand2 )
+ {
+ // result = operand1 - result
+ x64Gen_neg_reg64Low32(x64GenContext, rRegResult);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
+ }
+ else
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR)
+ {
+ if (rRegResult == rRegOperand2)
+ std::swap(rRegOperand1, rRegOperand2);
+
+ if (rRegResult != rRegOperand1)
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_OR)
+ x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ else if (imlInstruction->operation == PPCREC_IML_OP_AND)
+ x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ else
+ x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
+ {
+ // registerResult = registerOperand1 * registerOperand2
+ if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
+ {
+ // be careful not to overwrite the operand before we use it
+ if( rRegResult == rRegOperand1 )
+ x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ else
+ x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
+ }
+ else
+ {
+ // copy operand1 to destination register before doing multiplication
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+ // add operand2
+ x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW )
+ {
+ // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits)
+
+ if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW)
+ {
+ // use BMI2 SHRX if available
+ x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ }
+ else if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SLW)
+ {
+ // use BMI2 SHLX if available
+ x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); // trim result to 32bit
+ }
+ else
+ {
+ // lazy and slow way to do shift by register without relying on ECX/CL or BMI2
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
+ for (sint32 b = 0; b < 6; b++)
+ {
+ x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1 << b));
+ sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
+ if (b == 5)
+ {
+ x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
+ }
+ else
+ {
+ if (imlInstruction->operation == PPCREC_IML_OP_SLW)
+ x64Gen_shl_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b));
+ else
+ x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b));
+ }
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
+ }
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
+ {
+ // todo: Use BMI2 rotate if available
+ // check if CL/ECX/RCX is available
+ if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX )
+ {
+ // swap operand 2 with RCX
+ x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
+ // move operand 1 to temp register
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
+ // rotate
+ x64Gen_rol_reg64Low32_cl(x64GenContext, REG_RESV_TEMP);
+ // undo swap operand 2 with RCX
+ x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
+ // copy to result register
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
+ }
+ else
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
+ // lazy and slow way to do shift by register without relying on ECX/CL
+ for(sint32 b=0; b<5; b++)
+ {
+ x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
+ x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<emitter->GetWriteIndex());
+ }
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
+ }
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S ||
+ imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
+ imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ {
+ if(g_CPUFeatures.x86.bmi2)
+ {
+ if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ }
+ else
+ {
+ cemu_assert_debug(rRegOperand2 == X86_REG_ECX);
+ bool useTempReg = rRegResult == X86_REG_ECX && rRegOperand1 != X86_REG_ECX;
+ auto origRegResult = rRegResult;
+ if(useTempReg)
+ {
+ x64GenContext->emitter->MOV_dd(REG_RESV_TEMP, rRegOperand1);
+ rRegResult = REG_RESV_TEMP;
+ }
+ if(rRegOperand1 != rRegResult)
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+ if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ x64GenContext->emitter->SAR_d_CL(rRegResult);
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ x64GenContext->emitter->SHR_d_CL(rRegResult);
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ x64GenContext->emitter->SHL_d_CL(rRegResult);
+ if(useTempReg)
+ x64GenContext->emitter->MOV_dd(origRegResult, REG_RESV_TEMP);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
+ {
+ x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
+ x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
+ // mov operand 2 to temp register
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2);
+ // mov operand1 to EAX
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1);
+ // sign or zero extend EAX to EDX:EAX based on division sign mode
+ if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED )
+ x64Gen_cdq(x64GenContext);
+ else
+ x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX);
+ // make sure we avoid division by zero
+ x64Gen_test_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 3);
+ // divide
+ if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED )
+ x64Gen_idiv_reg64Low32(x64GenContext, REG_RESV_TEMP);
+ else
+ x64Gen_div_reg64Low32(x64GenContext, REG_RESV_TEMP);
+ // result of division is now stored in EAX, move it to result register
+ if( rRegResult != X86_REG_EAX )
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EAX);
+ // restore EAX / EDX
+ if( rRegResult != X86_REG_RAX )
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
+ if( rRegResult != X86_REG_RDX )
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED )
+ {
+ x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
+ x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
+ // mov operand 2 to temp register
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2);
+ // mov operand1 to EAX
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1);
+ if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED )
+ {
+ // zero extend EAX to EDX:EAX
+ x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX);
+ }
+ else
+ {
+ // sign extend EAX to EDX:EAX
+ x64Gen_cdq(x64GenContext);
+ }
+ // multiply
+ if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED )
+ x64Gen_imul_reg64Low32(x64GenContext, REG_RESV_TEMP);
+ else
+ x64Gen_mul_reg64Low32(x64GenContext, REG_RESV_TEMP);
+ // result of multiplication is now stored in EDX:EAX, move it to result register
+ if( rRegResult != X86_REG_EDX )
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EDX);
+ // restore EAX / EDX
+ if( rRegResult != X86_REG_RAX )
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
+ if( rRegResult != X86_REG_RDX )
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]));
+ }
+ else
+ {
+ cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR);
+ auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA);
+ auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB);
+ auto regCarry = _reg32(imlInstruction->op_r_r_r_carry.regCarry);
+ bool carryRegIsShared = regCarry == regA || regCarry == regB;
+ cemu_assert_debug(regCarry != regR); // two outputs sharing the same register is undefined behavior
+
+ switch (imlInstruction->operation)
+ {
+ case PPCREC_IML_OP_ADD:
+ if (regB == regR)
+ std::swap(regB, regA);
+ if (regR != regA)
+ x64GenContext->emitter->MOV_dd(regR, regA);
+ if(!carryRegIsShared)
+ x64GenContext->emitter->XOR_dd(regCarry, regCarry);
+ x64GenContext->emitter->ADD_dd(regR, regB);
+ x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); // below condition checks carry flag
+ if(carryRegIsShared)
+ x64GenContext->emitter->AND_di8(regCarry, 1); // clear upper bits
+ break;
+ case PPCREC_IML_OP_ADD_WITH_CARRY:
+ // assumes that carry is already correctly initialized as 0 or 1
+ if (regB == regR)
+ std::swap(regB, regA);
+ if (regR != regA)
+ x64GenContext->emitter->MOV_dd(regR, regA);
+ x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
+ x64GenContext->emitter->ADC_dd(regR, regB);
+ x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
+ break;
+ default:
+ cemu_assert_unimplemented();
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_IsSameCompare(IMLInstruction* imlInstructionA, IMLInstruction* imlInstructionB)
+{
+ if(imlInstructionA->type != imlInstructionB->type)
+ return false;
+ if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE)
+ return imlInstructionA->op_compare.regA == imlInstructionB->op_compare.regA && imlInstructionA->op_compare.regB == imlInstructionB->op_compare.regB;
+ else if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE_S32)
+ return imlInstructionA->op_compare_s32.regA == imlInstructionB->op_compare_s32.regA && imlInstructionA->op_compare_s32.immS32 == imlInstructionB->op_compare_s32.immS32;
+ return false;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, sint32& extraInstructionsProcessed)
+{
+ extraInstructionsProcessed = 0;
+ boost::container::static_vector compareInstructions;
+ compareInstructions.push_back(imlInstruction);
+ for(sint32 i=1; i<4; i++)
+ {
+ IMLInstruction* nextIns = x64GenContext->GetNextInstruction(i);
+ if(!nextIns || !PPCRecompilerX64Gen_IsSameCompare(imlInstruction, nextIns))
+ break;
+ compareInstructions.push_back(nextIns);
+ }
+ auto OperandOverlapsWithR = [&](IMLInstruction* ins) -> bool
+ {
+ cemu_assert_debug(ins->type == PPCREC_IML_TYPE_COMPARE || ins->type == PPCREC_IML_TYPE_COMPARE_S32);
+ if(ins->type == PPCREC_IML_TYPE_COMPARE)
+ return _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regA) || _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regB);
+ else /* PPCREC_IML_TYPE_COMPARE_S32 */
+ return _reg32_from_reg8(_reg8(ins->op_compare_s32.regR)) == _reg32(ins->op_compare_s32.regA);
+ };
+ auto GetRegR = [](IMLInstruction* insn)
+ {
+ return insn->type == PPCREC_IML_TYPE_COMPARE ? _reg32_from_reg8(_reg8(insn->op_compare.regR)) : _reg32_from_reg8(_reg8(insn->op_compare_s32.regR));
+ };
+ // prefer XOR method for zeroing out registers if possible
+ for(auto& it : compareInstructions)
+ {
+ if(OperandOverlapsWithR(it))
+ continue;
+ auto regR = GetRegR(it);
+ x64GenContext->emitter->XOR_dd(regR, regR); // zero bytes unaffected by SETcc
+ }
+ // emit the compare instruction
+ if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
+ {
+ auto regA = _reg32(imlInstruction->op_compare.regA);
+ auto regB = _reg32(imlInstruction->op_compare.regB);
+ x64GenContext->emitter->CMP_dd(regA, regB);
+ }
+ else if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ auto regA = _reg32(imlInstruction->op_compare_s32.regA);
+ sint32 imm = imlInstruction->op_compare_s32.immS32;
+ x64GenContext->emitter->CMP_di32(regA, imm);
+ }
+ // emit the SETcc instructions
+ for(auto& it : compareInstructions)
+ {
+ auto regR = _reg8(it->op_compare.regR);
+ X86Cond cond = _x86Cond(it->op_compare.cond);
+ if(OperandOverlapsWithR(it))
+ x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0);
+ x64GenContext->emitter->SETcc_b(cond, regR);
+ }
+ extraInstructionsProcessed = (sint32)compareInstructions.size() - 1;
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
+{
+ auto regBool = _reg8(imlInstruction->op_conditional_jump.registerBool);
+ bool mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue;
+ x64GenContext->emitter->TEST_bb(regBool, regBool);
+ PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
+ x64GenContext->emitter->Jcc_j32(mustBeTrue ? X86_CONDITION_NZ : X86_CONDITION_Z, 0);
+ return true;
+}
+
+void PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
+{
+ X86Cond cond = _x86Cond(imlInstruction->op_x86_eflags_jcc.cond, imlInstruction->op_x86_eflags_jcc.invertedCondition);
+ PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
+ x64GenContext->emitter->Jcc_j32(cond, 0);
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
+{
+ PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
+ x64GenContext->emitter->JMP_j32(0);
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_r_s32.regR);
+ auto regA = _reg32(imlInstruction->op_r_r_s32.regA);
+ uint32 immS32 = imlInstruction->op_r_r_s32.immS32;
+
+ if( imlInstruction->operation == PPCREC_IML_OP_ADD )
+ {
+ uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
+ if(regR != regA)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ x64Gen_add_reg64Low32_imm32(x64GenContext, regR, (uint32)immU32);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
+ {
+ if (regR != regA)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ x64Gen_sub_reg64Low32_imm32(x64GenContext, regR, immS32);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_AND ||
+ imlInstruction->operation == PPCREC_IML_OP_OR ||
+ imlInstruction->operation == PPCREC_IML_OP_XOR)
+ {
+ if (regR != regA)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ if (imlInstruction->operation == PPCREC_IML_OP_AND)
+ x64Gen_and_reg64Low32_imm32(x64GenContext, regR, immS32);
+ else if (imlInstruction->operation == PPCREC_IML_OP_OR)
+ x64Gen_or_reg64Low32_imm32(x64GenContext, regR, immS32);
+ else // XOR
+ x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
+ {
+ // registerResult = registerOperand * immS32
+ sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32;
+ x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize
+ if( regR != regA )
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ||
+ imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
+ imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ {
+ if( regA != regR )
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ x64Gen_shr_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
+ else // RIGHT_SHIFT_S
+ x64Gen_sar_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
+ }
+ else
+ {
+ debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR);
+ auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA);
+ sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32;
+ auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry);
+ cemu_assert_debug(regCarry != regR); // we dont allow two different outputs sharing the same register
+
+ bool delayCarryInit = regCarry == regA;
+
+ switch (imlInstruction->operation)
+ {
+ case PPCREC_IML_OP_ADD:
+ if(!delayCarryInit)
+ x64GenContext->emitter->XOR_dd(regCarry, regCarry);
+ if (regR != regA)
+ x64GenContext->emitter->MOV_dd(regR, regA);
+ x64GenContext->emitter->ADD_di32(regR, immS32);
+ if(delayCarryInit)
+ x64GenContext->emitter->MOV_di32(regCarry, 0);
+ x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
+ break;
+ case PPCREC_IML_OP_ADD_WITH_CARRY:
+ // assumes that carry is already correctly initialized as 0 or 1
+ cemu_assert_debug(regCarry != regR);
+ if (regR != regA)
+ x64GenContext->emitter->MOV_dd(regR, regA);
+ x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
+ x64GenContext->emitter->ADC_di32(regR, immS32);
+ x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
+ break;
+ default:
+ cemu_assert_unimplemented();
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ // some tests (all performed on a i7-4790K)
+ // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency)
+ // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC
+
+ // BT
+ x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
+ cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken());
+ PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken());
+ x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0);
+ return true;
+}
+
+void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 name = imlInstruction->op_r_name.name;
+ if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ auto regR = _reg64(imlInstruction->op_r_name.regR);
+ if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
+ {
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0));
+ }
+ else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
+ {
+ sint32 sprIndex = (name - PPCREC_NAME_SPR0);
+ if (sprIndex == SPR_LR)
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR));
+ else if (sprIndex == SPR_CTR)
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR));
+ else if (sprIndex == SPR_XER)
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER));
+ else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
+ {
+ sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, memOffset);
+ }
+ else
+ assert_dbg();
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
+ {
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
+ }
+ else if (name == PPCREC_NAME_XER_CA)
+ {
+ x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca));
+ }
+ else if (name == PPCREC_NAME_XER_SO)
+ {
+ x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so));
+ }
+ else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
+ {
+ x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_EA)
+ {
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
+ {
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue));
+ }
+ else
+ assert_dbg();
+ }
+ else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ auto regR = _regF64(imlInstruction->op_r_name.regR);
+ if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
+ {
+ sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
+ sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
+ x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double));
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
+ {
+ x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
+ }
+ else
+ {
+ cemu_assert_debug(false);
+ }
+ }
+ else
+ DEBUG_BREAK;
+
+}
+
+void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 name = imlInstruction->op_r_name.name;
+
+ if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ auto regR = _reg64(imlInstruction->op_r_name.regR);
+ if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
+ {
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR);
+ }
+ else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
+ {
+ uint32 sprIndex = (name - PPCREC_NAME_SPR0);
+ if (sprIndex == SPR_LR)
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), regR);
+ else if (sprIndex == SPR_CTR)
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR), regR);
+ else if (sprIndex == SPR_XER)
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER), regR);
+ else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
+ {
+ sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, memOffset, regR);
+ }
+ else
+ assert_dbg();
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
+ {
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR);
+ }
+ else if (name == PPCREC_NAME_XER_CA)
+ {
+ x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR));
+ }
+ else if (name == PPCREC_NAME_XER_SO)
+ {
+ x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR));
+ }
+ else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
+ {
+ x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_EA)
+ {
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr), regR);
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
+ {
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue), regR);
+ }
+ else
+ assert_dbg();
+ }
+ else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ auto regR = _regF64(imlInstruction->op_r_name.regR);
+ uint32 name = imlInstruction->op_r_name.name;
+ if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
+ {
+ sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
+ sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
+ x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0));
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
+ {
+ x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
+ }
+ else
+ {
+ cemu_assert_debug(false);
+ }
+ }
+ else
+ DEBUG_BREAK;
+
+
+}
+
+uint8* codeMemoryBlock = nullptr;
+sint32 codeMemoryBlockIndex = 0;
+sint32 codeMemoryBlockSize = 0;
+
+std::mutex mtx_allocExecutableMemory;
+
+uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size)
+{
+ std::lock_guard lck(mtx_allocExecutableMemory);
+ if( codeMemoryBlockIndex+size > codeMemoryBlockSize )
+ {
+ // allocate new block
+ codeMemoryBlockSize = std::max(1024*1024*4, size+1024); // 4MB (or more if the function is larger than 4MB)
+ codeMemoryBlockIndex = 0;
+ codeMemoryBlock = (uint8*)MemMapper::AllocateMemory(nullptr, codeMemoryBlockSize, MemMapper::PAGE_PERMISSION::P_RWX);
+ }
+ uint8* codeMem = codeMemoryBlock + codeMemoryBlockIndex;
+ codeMemoryBlockIndex += size;
+ // pad to 4 byte alignment
+ while (codeMemoryBlockIndex & 3)
+ {
+ codeMemoryBlock[codeMemoryBlockIndex] = 0x90;
+ codeMemoryBlockIndex++;
+ }
+ return codeMem;
+}
+
+bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext)
+{
+ x64GenContext_t x64GenContext{};
+
+ // generate iml instruction code
+ bool codeGenerationFailed = false;
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ x64GenContext.currentSegment = segIt;
+ segIt->x64Offset = x64GenContext.emitter->GetWriteIndex();
+ for(size_t i=0; iimlList.size(); i++)
+ {
+ x64GenContext.m_currentInstructionEmitIndex = i;
+ IMLInstruction* imlInstruction = segIt->imlList.data() + i;
+
+ if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME )
+ {
+ PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_R_R )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ sint32 extraInstructionsProcessed;
+ PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, extraInstructionsProcessed);
+ i += extraInstructionsProcessed;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
+ codeGenerationFailed = true;
+ }
+ else if(imlInstruction->type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ {
+ PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
+ codeGenerationFailed = true;
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
+ {
+ PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_STORE )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP )
+ {
+ // no op
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE)
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else
+ {
+ debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type);
+ assert_dbg();
+ }
+ }
+ }
+ // handle failed code generation
+ if( codeGenerationFailed )
+ {
+ return false;
+ }
+ // allocate executable memory
+ uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
+ size_t baseAddress = (size_t)executableMemory;
+ // fix relocs
+ for(auto& relocIt : x64GenContext.relocateOffsetTable2)
+ {
+ // search for segment that starts with this offset
+ uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo;
+ uint32 x64Offset = 0xFFFFFFFF;
+
+ IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo;
+ x64Offset = destSegment->x64Offset;
+
+ uint32 relocBase = relocIt.offset;
+ uint8* relocInstruction = x64GenContext.emitter->GetBufferPtr()+relocBase;
+ if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) )
+ {
+ // Jcc relativeImm32
+ sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2));
+ if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled
+ {
+ // convert to near Jcc
+ *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70);
+ // patch offset
+ *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump;
+ // replace unused 4 bytes with NOP instruction
+ relocInstruction[2] = 0x0F;
+ relocInstruction[3] = 0x1F;
+ relocInstruction[4] = 0x40;
+ relocInstruction[5] = 0x00;
+ }
+ else
+ {
+ // patch offset
+ *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6));
+ }
+ }
+ else if( relocInstruction[0] == 0xE9 )
+ {
+ // JMP relativeImm32
+ *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5));
+ }
+ else
+ assert_dbg();
+ }
+
+ // copy code to executable memory
+ std::span codeBuffer = x64GenContext.emitter->GetBuffer();
+ memcpy(executableMemory, codeBuffer.data(), codeBuffer.size_bytes());
+ // set code
+ PPCRecFunction->x86Code = executableMemory;
+ PPCRecFunction->x86Size = codeBuffer.size_bytes();
+ return true;
+}
+
+void PPCRecompilerX64Gen_generateEnterRecompilerCode()
+{
+ x64GenContext_t x64GenContext{};
+
+ // start of recompiler entry function (15 regs)
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RAX);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RCX);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RDX);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RBX);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RBP);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RDI);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RSI);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R8);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R9);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R10);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R11);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R12);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R13);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R14);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R15);
+
+ // 000000007775EF04 | E8 00 00 00 00 call +0x00
+ x64Gen_writeU8(&x64GenContext, 0xE8);
+ x64Gen_writeU8(&x64GenContext, 0x00);
+ x64Gen_writeU8(&x64GenContext, 0x00);
+ x64Gen_writeU8(&x64GenContext, 0x00);
+ x64Gen_writeU8(&x64GenContext, 0x00);
+ //000000007775EF09 | 48 83 04 24 05 add qword ptr ss:[rsp],5
+ x64Gen_writeU8(&x64GenContext, 0x48);
+ x64Gen_writeU8(&x64GenContext, 0x83);
+ x64Gen_writeU8(&x64GenContext, 0x04);
+ x64Gen_writeU8(&x64GenContext, 0x24);
+ uint32 jmpPatchOffset = x64GenContext.emitter->GetWriteIndex();
+ x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP
+ x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP);
+
+ // MOV RSP, RDX (ppc interpreter instance)
+ x64Gen_mov_reg64_reg64(&x64GenContext, REG_RESV_HCPU, X86_REG_RDX);
+ // MOV R15, ppcRecompilerInstanceData
+ x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData);
+ // MOV R13, memory_base
+ x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base);
+
+ //JMP recFunc
+ x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1
+
+ x64GenContext.emitter->GetBuffer()[jmpPatchOffset] = (x64GenContext.emitter->GetWriteIndex() -(jmpPatchOffset-4));
+
+ //recompilerExit1:
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R15);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R14);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R13);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R12);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R11);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R10);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R9);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R8);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RSI);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RDI);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RBP);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RBX);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RDX);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RCX);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RAX);
+ // RET
+ x64Gen_ret(&x64GenContext);
+
+ uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
+ // copy code to executable memory
+ memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes());
+ PPCRecompiler_enterRecompilerCode = (void ATTR_MS_ABI (*)(uint64,uint64))executableMemory;
+}
+
+
+void* PPCRecompilerX64Gen_generateLeaveRecompilerCode()
+{
+ x64GenContext_t x64GenContext{};
+
+ // update instruction pointer
+ // LR is in EDX
+ x64Emit_mov_mem32_reg32(&x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), X86_REG_EDX);
+ // MOV RSP, [hCPU->rspTemp]
+ x64Emit_mov_reg64_mem64(&x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp));
+ // RET
+ x64Gen_ret(&x64GenContext);
+
+ uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
+ // copy code to executable memory
+ memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes());
+ return executableMemory;
+}
+
+void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions()
+{
+ PPCRecompilerX64Gen_generateEnterRecompilerCode();
+ PPCRecompiler_leaveRecompilerCode_unvisited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode();
+ PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode();
+ cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited);
+}
+
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h
similarity index 81%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h
index 1d37a77e..de415ca9 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h
@@ -1,104 +1,56 @@
-typedef struct
+#include "../PPCRecompiler.h" // todo - get rid of dependency
+
+#include "x86Emitter.h"
+
+struct x64RelocEntry_t
{
+ x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {};
+
uint32 offset;
- uint8 type;
void* extraInfo;
-}x64RelocEntry_t;
+};
-typedef struct
+struct x64GenContext_t
{
- uint8* codeBuffer;
- sint32 codeBufferIndex;
- sint32 codeBufferSize;
- // cr state
- sint32 activeCRRegister; // current x86 condition flags reflect this cr* register
- sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned)
+ IMLSegment* currentSegment{};
+ x86Assembler64* emitter;
+ sint32 m_currentInstructionEmitIndex;
+
+ x64GenContext_t()
+ {
+ emitter = new x86Assembler64();
+ }
+
+ ~x64GenContext_t()
+ {
+ delete emitter;
+ }
+
+ IMLInstruction* GetNextInstruction(sint32 relativeIndex = 1)
+ {
+ sint32 index = m_currentInstructionEmitIndex + relativeIndex;
+ if(index < 0 || index >= (sint32)currentSegment->imlList.size())
+ return nullptr;
+ return currentSegment->imlList.data() + index;
+ }
+
// relocate offsets
- x64RelocEntry_t* relocateOffsetTable;
- sint32 relocateOffsetTableSize;
- sint32 relocateOffsetTableCount;
-}x64GenContext_t;
-
-// Some of these are defined by winnt.h and gnu headers
-#undef REG_EAX
-#undef REG_ECX
-#undef REG_EDX
-#undef REG_EBX
-#undef REG_ESP
-#undef REG_EBP
-#undef REG_ESI
-#undef REG_EDI
-#undef REG_NONE
-#undef REG_RAX
-#undef REG_RCX
-#undef REG_RDX
-#undef REG_RBX
-#undef REG_RSP
-#undef REG_RBP
-#undef REG_RSI
-#undef REG_RDI
-#undef REG_R8
-#undef REG_R9
-#undef REG_R10
-#undef REG_R11
-#undef REG_R12
-#undef REG_R13
-#undef REG_R14
-#undef REG_R15
-
-#define REG_EAX 0
-#define REG_ECX 1
-#define REG_EDX 2
-#define REG_EBX 3
-#define REG_ESP 4 // reserved for low half of hCPU pointer
-#define REG_EBP 5
-#define REG_ESI 6
-#define REG_EDI 7
-#define REG_NONE -1
-
-#define REG_RAX 0
-#define REG_RCX 1
-#define REG_RDX 2
-#define REG_RBX 3
-#define REG_RSP 4 // reserved for hCPU pointer
-#define REG_RBP 5
-#define REG_RSI 6
-#define REG_RDI 7
-#define REG_R8 8
-#define REG_R9 9
-#define REG_R10 10
-#define REG_R11 11
-#define REG_R12 12
-#define REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
-#define REG_R14 14 // reserved as temporary register
-#define REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
-
-#define REG_AL 0
-#define REG_CL 1
-#define REG_DL 2
-#define REG_BL 3
-#define REG_AH 4
-#define REG_CH 5
-#define REG_DH 6
-#define REG_BH 7
+ std::vector relocateOffsetTable2;
+};
// reserved registers
-#define REG_RESV_TEMP (REG_R14)
-#define REG_RESV_HCPU (REG_RSP)
-#define REG_RESV_MEMBASE (REG_R13)
-#define REG_RESV_RECDATA (REG_R15)
+#define REG_RESV_TEMP (X86_REG_R14)
+#define REG_RESV_HCPU (X86_REG_RSP)
+#define REG_RESV_MEMBASE (X86_REG_R13)
+#define REG_RESV_RECDATA (X86_REG_R15)
// reserved floating-point registers
#define REG_RESV_FPR_TEMP (15)
+#define reg32ToReg16(__x) (__x) // deprecated
-extern sint32 x64Gen_registerMap[12];
-
-#define tempToRealRegister(__x) (x64Gen_registerMap[__x])
-#define tempToRealFPRRegister(__x) (__x)
-#define reg32ToReg16(__x) (__x)
-
+// deprecated condition flags
enum
{
X86_CONDITION_EQUAL, // or zero
@@ -119,36 +71,23 @@ enum
X86_CONDITION_NONE, // no condition, jump always
};
-#define PPCREC_CR_TEMPORARY (8) // never stored
-#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI)
-#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
-#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
-
-#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction
-#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset
-#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment
-
-#define PPC_X64_GPR_USABLE_REGISTERS (16-4)
-#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register
-
-
-bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
-
-void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext);
+bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset);
void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
-bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed);
+bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed);
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+
+void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
// ASM gen
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v);
@@ -196,9 +135,6 @@ void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstReg
void x64Gen_and_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
-void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
-void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister);
-
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_add_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_add_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
@@ -207,9 +143,6 @@ void x64Gen_sub_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des
void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_sub_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, sint32 memImmS32, uint64 immU32);
-void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
-void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
-void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32);
void x64Gen_imul_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 operandRegister);
void x64Gen_idiv_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
@@ -241,9 +174,7 @@ void x64Gen_not_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_neg_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_cdq(x64GenContext_t* x64GenContext);
-void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister);
-void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
@@ -274,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
+void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
@@ -299,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
+void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
@@ -329,4 +262,8 @@ void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64G
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
-void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
\ No newline at end of file
+void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
+void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
+void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
+void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
+void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
similarity index 92%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
index 619c3985..b0ef8640 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
@@ -1,5 +1,4 @@
-#include "PPCRecompiler.h"
-#include "PPCRecompilerX64.h"
+#include "BackendX64.h"
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
@@ -21,11 +20,10 @@ void _x64Gen_vex128_nds(x64GenContext_t* x64GenContext, uint8 opcodeMap, uint8 a
x64Gen_writeU8(x64GenContext, opcode);
}
-#define VEX_PP_0F 0 // guessed
+#define VEX_PP_0F 0
#define VEX_PP_66_0F 1
-#define VEX_PP_F3_0F 2 // guessed
-#define VEX_PP_F2_0F 3 // guessed
-
+#define VEX_PP_F3_0F 2
+#define VEX_PP_F2_0F 3
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
{
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
similarity index 67%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
index 5a71e93d..bbb707e0 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
@@ -1,5 +1,4 @@
-#include "PPCRecompiler.h"
-#include "PPCRecompilerX64.h"
+#include "BackendX64.h"
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
@@ -69,6 +68,34 @@ void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
+void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
+{
+ x64Gen_writeU8(x64GenContext, 0xC4);
+ x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
+ x64Gen_writeU8(x64GenContext, 0x7B - registerB * 8);
+ x64Gen_writeU8(x64GenContext, 0xF7);
+ x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
+}
+
+void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
+{
+ // SARX reg64, reg64, reg64
+ x64Gen_writeU8(x64GenContext, 0xC4);
+ x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
+ x64Gen_writeU8(x64GenContext, 0xFA - registerB * 8);
+ x64Gen_writeU8(x64GenContext, 0xF7);
+ x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
+}
+
+void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
+{
+ x64Gen_writeU8(x64GenContext, 0xC4);
+ x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
+ x64Gen_writeU8(x64GenContext, 0x7A - registerB * 8);
+ x64Gen_writeU8(x64GenContext, 0xF7);
+ x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
+}
+
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
// SHLX reg64, reg64, reg64
@@ -77,4 +104,13 @@ void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist
x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
+}
+
+void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
+{
+ x64Gen_writeU8(x64GenContext, 0xC4);
+ x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
+ x64Gen_writeU8(x64GenContext, 0x79 - registerB * 8);
+ x64Gen_writeU8(x64GenContext, 0xF7);
+ x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
new file mode 100644
index 00000000..6a8b1b97
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
@@ -0,0 +1,469 @@
+#include "../PPCRecompiler.h"
+#include "../IML/IML.h"
+#include "BackendX64.h"
+#include "Common/cpu_features.h"
+
+uint32 _regF64(IMLReg physReg);
+
+uint32 _regI32(IMLReg r)
+{
+ cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32);
+ return (uint32)r.GetRegID();
+}
+
+static x86Assembler64::GPR32 _reg32(sint8 physRegId)
+{
+ return (x86Assembler64::GPR32)physRegId;
+}
+
+static x86Assembler64::GPR8_REX _reg8(IMLReg r)
+{
+ cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); // currently bool regs are implemented as 32bit registers
+ return (x86Assembler64::GPR8_REX)r.GetRegID();
+}
+
+static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
+{
+ return (x86Assembler64::GPR32)regId;
+}
+
+static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
+{
+ return (x86Assembler64::GPR8_REX)regId;
+}
+
+// load from memory
+bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
+{
+ sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
+ sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
+ sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
+ if( indexed )
+ realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
+ uint8 mode = imlInstruction->op_storeLoad.mode;
+
+ if( mode == PPCREC_FPR_LD_MODE_SINGLE )
+ {
+ // load byte swapped single into temporary FPR
+ if( indexed )
+ {
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
+ if(g_CPUFeatures.x86.movbe)
+ x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
+ else
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
+ }
+ else
+ {
+ if(g_CPUFeatures.x86.movbe)
+ x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ else
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ }
+ if(g_CPUFeatures.x86.movbe == false )
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP);
+
+ if (imlInstruction->op_storeLoad.flags2.notExpanded)
+ {
+ // leave value as single
+ }
+ else
+ {
+ x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM);
+ }
+ }
+ else if( mode == PPCREC_FPR_LD_MODE_DOUBLE )
+ {
+ if( g_CPUFeatures.x86.avx )
+ {
+ if( indexed )
+ {
+ // calculate offset
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
+ // load value
+ x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0);
+ x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
+ x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP);
+ }
+ else
+ {
+ x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0);
+ x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
+ x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP);
+ }
+ }
+ else
+ {
+ if( indexed )
+ {
+ // calculate offset
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
+ // load double low part to temporaryFPR
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP);
+ // calculate offset again
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
+ // load double high part to temporaryFPR
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP);
+ // load double from temporaryFPR
+ x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
+ }
+ else
+ {
+ // load double low part to temporaryFPR
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP);
+ // load double high part to temporaryFPR
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP);
+ // load double from temporaryFPR
+ x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
+ }
+ }
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+// store to memory
+bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
+{
+ sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
+ sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
+ sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
+ if( indexed )
+ realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
+ uint8 mode = imlInstruction->op_storeLoad.mode;
+ if( mode == PPCREC_FPR_ST_MODE_SINGLE )
+ {
+ if (imlInstruction->op_storeLoad.flags2.notExpanded)
+ {
+ // value is already in single format
+ x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
+ }
+ else
+ {
+ x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM);
+ x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
+ }
+ if(g_CPUFeatures.x86.movbe == false )
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ if( indexed )
+ {
+ if( realRegisterMem == realRegisterMem2 )
+ assert_dbg();
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ if(g_CPUFeatures.x86.movbe)
+ x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ else
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ if( indexed )
+ {
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ }
+ else if( mode == PPCREC_FPR_ST_MODE_DOUBLE )
+ {
+ if( indexed )
+ {
+ if( realRegisterMem == realRegisterMem2 )
+ assert_dbg();
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
+ // store double low part
+ x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP);
+ // store double high part
+ x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP);
+ if( indexed )
+ {
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ }
+ else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 )
+ {
+ x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ if( indexed )
+ {
+ cemu_assert_debug(realRegisterMem == realRegisterMem2);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else
+ {
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ }
+ }
+ else
+ {
+ debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
+ return false;
+ }
+ return true;
+}
+
+// FPR op FPR
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT )
+ {
+ uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR);
+ uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA);
+ x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr);
+ return;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT )
+ {
+ uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
+ uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
+ x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
+ return;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
+ {
+ cemu_assert_debug(imlInstruction->op_fpr_r_r.regR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
+ cemu_assert_debug(imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32); // supporting only 32bit floats as input for now
+ // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
+ uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
+ if (imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32)
+ {
+ uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
+ x64Gen_movq_xmmReg_reg64(x64GenContext, regFpr, regGpr); // using reg32 as reg64 param here is ok. We'll refactor later
+ // float to double
+ x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regFpr, regFpr);
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+ return;
+ }
+
+ uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
+ uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
+ if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN )
+ {
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY )
+ {
+ x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE )
+ {
+ x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD )
+ {
+ x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
+ {
+ x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ )
+ {
+ x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA);
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
+ // move to FPR register
+ x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
+ }
+ else
+ {
+ assert_dbg();
+ }
+}
+
+/*
+ * FPR = op (fprA, fprB)
+ */
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r.regR);
+ uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA);
+ uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
+ {
+ if (regR == regA)
+ {
+ x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ else if (regR == regB)
+ {
+ x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else
+ {
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
+ {
+ // todo: Use AVX 3-operand VADDSD if available
+ if (regR == regA)
+ {
+ x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ else if (regR == regB)
+ {
+ x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else
+ {
+ x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
+ x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
+ {
+ if( regR == regA )
+ {
+ x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ else if( regR == regB )
+ {
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
+ x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
+ }
+ else
+ {
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ }
+ else
+ assert_dbg();
+}
+
+/*
+ * FPR = op (fprA, fprB, fprC)
+ */
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r_r.regR);
+ uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r_r.regA);
+ uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB);
+ uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC);
+
+ if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT )
+ {
+ x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
+ sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
+ // select C
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC);
+ sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
+ // select B
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ // end
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
+ }
+ else
+ assert_dbg();
+}
+
+void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 regR = _regF64(imlInstruction->op_fpr_r.regR);
+
+ if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE )
+ {
+ x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE )
+ {
+ x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS )
+ {
+ x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS )
+ {
+ x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM )
+ {
+ // convert to 32bit single
+ x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, regR, regR);
+ // convert back to 64bit double
+ x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
+ {
+ // convert bottom to 64bit double
+ x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+}
+
+void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg8(imlInstruction->op_fpr_compare.regR);
+ auto regA = _regF64(imlInstruction->op_fpr_compare.regA);
+ auto regB = _regF64(imlInstruction->op_fpr_compare.regB);
+
+ x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR));
+ x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB);
+
+ if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT)
+ {
+ // GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both)
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR);
+ return;
+ }
+ else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U)
+ {
+ // unordered case can be checked via PF
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR);
+ return;
+ }
+
+ // remember unordered state
+ auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP));
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions
+
+ X86Cond x86Cond;
+ switch (imlInstruction->op_fpr_compare.cond)
+ {
+ case IMLCondition::UNORDERED_LT:
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR);
+ break;
+ case IMLCondition::UNORDERED_EQ:
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR);
+ break;
+ default:
+ cemu_assert_unimplemented();
+ }
+ x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero
+}
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
similarity index 90%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
index 19327f46..efe929d0 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
@@ -1,62 +1,31 @@
-#include "PPCRecompiler.h"
-#include "PPCRecompilerIml.h"
-#include "PPCRecompilerX64.h"
+#include "BackendX64.h"
// x86/x64 extension opcodes that could be useful:
// ANDN
// mulx, rorx, sarx, shlx, shrx
// PDEP, PEXT
-void x64Gen_checkBuffer(x64GenContext_t* x64GenContext)
-{
- // todo
-}
-
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v)
{
- if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize )
- {
- x64GenContext->codeBufferSize *= 2;
- x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
- }
- *(uint8*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
- x64GenContext->codeBufferIndex++;
+ x64GenContext->emitter->_emitU8(v);
}
void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v)
{
- if( x64GenContext->codeBufferIndex+2 > x64GenContext->codeBufferSize )
- {
- x64GenContext->codeBufferSize *= 2;
- x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
- }
- *(uint16*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
- x64GenContext->codeBufferIndex += 2;
+ x64GenContext->emitter->_emitU16(v);
}
void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v)
{
- if( x64GenContext->codeBufferIndex+4 > x64GenContext->codeBufferSize )
- {
- x64GenContext->codeBufferSize *= 2;
- x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
- }
- *(uint32*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
- x64GenContext->codeBufferIndex += 4;
+ x64GenContext->emitter->_emitU32(v);
}
void x64Gen_writeU64(x64GenContext_t* x64GenContext, uint64 v)
{
- if( x64GenContext->codeBufferIndex+8 > x64GenContext->codeBufferSize )
- {
- x64GenContext->codeBufferSize *= 2;
- x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
- }
- *(uint64*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
- x64GenContext->codeBufferIndex += 8;
+ x64GenContext->emitter->_emitU64(v);
}
-#include "x64Emit.hpp"
+#include "X64Emit.hpp"
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
{
@@ -67,7 +36,7 @@ void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataReg
forceUseOffset = true;
}
- if (memRegisterB64 == REG_NONE)
+ if (memRegisterB64 == X86_REG_NONE)
{
// memRegisterA64 + memImmS32
uint8 modRM = (dataRegister & 7) * 8 + (memRegisterA64 & 7);
@@ -352,7 +321,7 @@ void x64Gen_mov_mem32Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32)
{
// MOV QWORD [+], dataImmU32
- if( memRegister == REG_R14 )
+ if( memRegister == X86_REG_R14 )
{
sint32 memImmS32 = (sint32)memImmU32;
if( memImmS32 == 0 )
@@ -384,7 +353,7 @@ void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
void x64Gen_mov_mem8Reg64_imm8(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint8 dataImmU8)
{
// MOV BYTE [+], dataImmU8
- if( memRegister == REG_RSP )
+ if( memRegister == X86_REG_RSP )
{
sint32 memImmS32 = (sint32)memImmU32;
if( memImmS32 >= -128 && memImmS32 <= 127 )
@@ -625,7 +594,7 @@ void _x64_op_reg64Low_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegist
if (memRegister64 >= 8)
x64Gen_writeU8(x64GenContext, 0x41);
x64Gen_writeU8(x64GenContext, opByte);
- _x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, REG_NONE, memImmS32);
+ _x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, X86_REG_NONE, memImmS32);
}
void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32)
@@ -643,40 +612,6 @@ void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRe
_x64_op_reg64Low_mem8Reg64(x64GenContext, dstRegister, memRegister64, memImmS32, 0x88);
}
-void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister)
-{
- // LOCK CMPXCHG DWORD [ + + ], (low dword)
- x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix
-
- if( srcRegister >= 8 || memRegisterA64 >= 8|| memRegisterB64 >= 8 )
- x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegisterA64>=8)?1:0)+((memRegisterB64>=8)?2:0));
-
- x64Gen_writeU8(x64GenContext, 0x0F);
- x64Gen_writeU8(x64GenContext, 0xB1);
-
- _x64Gen_writeMODRMDeprecated(x64GenContext, srcRegister, memRegisterA64, memRegisterB64, memImmS32);
-}
-
-void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister)
-{
- // LOCK CMPXCHG DWORD [ + ], (low dword)
- x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix
-
- if( srcRegister >= 8 || memRegister64 >= 8 )
- x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegister64>=8)?1:0));
-
- x64Gen_writeU8(x64GenContext, 0x0F);
- x64Gen_writeU8(x64GenContext, 0xB1);
-
- if( memImmS32 == 0 )
- {
- x64Gen_writeU8(x64GenContext, 0x45+(srcRegister&7)*8);
- x64Gen_writeU8(x64GenContext, 0x00);
- }
- else
- assert_dbg();
-}
-
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
{
// ADD ,
@@ -732,7 +667,7 @@ void x64Gen_add_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x05);
@@ -772,7 +707,7 @@ void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x2D);
@@ -811,7 +746,7 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
{
// SUB ,
sint32 immS32 = (sint32)immU32;
- if( memRegister == REG_RSP )
+ if( memRegister == X86_REG_RSP )
{
if( memImmS32 >= 128 )
{
@@ -843,64 +778,11 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
}
}
-void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
-{
- // SBB ,
- if( destRegister >= 8 && srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x45);
- else if( srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x44);
- else if( destRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0x19);
- x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7));
-}
-
-void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
-{
- // ADC ,
- if( destRegister >= 8 && srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x45);
- else if( srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x44);
- else if( destRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0x11);
- x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7));
-}
-
-void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32)
-{
- sint32 immS32 = (sint32)immU32;
- if( srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x41);
- if( immS32 >= -128 && immS32 <= 127 )
- {
- x64Gen_writeU8(x64GenContext, 0x83);
- x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7));
- x64Gen_writeU8(x64GenContext, (uint8)immS32);
- }
- else
- {
- if( srcRegister == REG_RAX )
- {
- // special EAX short form
- x64Gen_writeU8(x64GenContext, 0x15);
- }
- else
- {
- x64Gen_writeU8(x64GenContext, 0x81);
- x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7));
- }
- x64Gen_writeU32(x64GenContext, immU32);
- }
-}
-
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32)
{
// DEC dword [+imm]
sint32 memoryImmS32 = (sint32)memoryImmU32;
- if (memoryRegister != REG_RSP)
+ if (memoryRegister != X86_REG_RSP)
assert_dbg(); // not supported yet
if (memoryImmS32 >= -128 && memoryImmS32 <= 127)
{
@@ -981,7 +863,7 @@ void x64Gen_and_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x25);
@@ -1026,7 +908,7 @@ void x64Gen_test_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegi
sint32 immS32 = (sint32)immU32;
if( srcRegister >= 8 )
x64Gen_writeU8(x64GenContext, 0x41);
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0xA9);
@@ -1052,7 +934,7 @@ void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special RAX short form
x64Gen_writeU8(x64GenContext, 0x3D);
@@ -1082,7 +964,7 @@ void x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des
void x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 memRegister, sint32 memImmS32)
{
// CMP , DWORD [+]
- if( memRegister == REG_RSP )
+ if( memRegister == X86_REG_RSP )
{
if( memImmS32 >= -128 && memImmS32 <= 127 )
assert_dbg(); // todo -> Shorter instruction form
@@ -1112,7 +994,7 @@ void x64Gen_or_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegist
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x0D);
@@ -1172,7 +1054,7 @@ void x64Gen_xor_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x35);
@@ -1326,16 +1208,6 @@ void x64Gen_cdq(x64GenContext_t* x64GenContext)
x64Gen_writeU8(x64GenContext, 0x99);
}
-void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister)
-{
- if( destRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x41|8);
- else
- x64Gen_writeU8(x64GenContext, 0x40|8);
- x64Gen_writeU8(x64GenContext, 0x0F);
- x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
-}
-
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister)
{
if( destRegister >= 8 )
@@ -1344,16 +1216,6 @@ void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destReg
x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
}
-void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister)
-{
- assert_dbg(); // do not use this instruction, it's result is always undefined. Instead use ROL , 8
- //x64Gen_writeU8(x64GenContext, 0x66);
- //if( destRegister >= 8 )
- // x64Gen_writeU8(x64GenContext, 0x41);
- //x64Gen_writeU8(x64GenContext, 0x0F);
- //x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
-}
-
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
{
// SSE4
@@ -1388,7 +1250,7 @@ void x64Gen_setcc_mem8(x64GenContext_t* x64GenContext, sint32 conditionType, sin
{
// SETcc [+imm]
sint32 memoryImmS32 = (sint32)memoryImmU32;
- if( memoryRegister != REG_RSP )
+ if( memoryRegister != X86_REG_RSP )
assert_dbg(); // not supported
if( memoryRegister >= 8 )
assert_dbg(); // not supported
@@ -1627,7 +1489,7 @@ void x64Gen_bt_mem8(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint3
{
// BT [+imm], bitIndex (bit test)
sint32 memoryImmS32 = (sint32)memoryImmU32;
- if( memoryRegister != REG_RSP )
+ if( memoryRegister != X86_REG_RSP )
assert_dbg(); // not supported yet
if( memoryImmS32 >= -128 && memoryImmS32 <= 127 )
{
@@ -1662,7 +1524,7 @@ void x64Gen_jmp_imm32(x64GenContext_t* x64GenContext, uint32 destImm32)
void x64Gen_jmp_memReg64(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 immU32)
{
- if( memRegister == REG_NONE )
+ if( memRegister == X86_REG_NONE )
{
assert_dbg();
}
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
similarity index 92%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
index 92289d68..4bbcc025 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
@@ -1,6 +1,4 @@
-#include "PPCRecompiler.h"
-#include "PPCRecompilerIml.h"
-#include "PPCRecompilerX64.h"
+#include "BackendX64.h"
void x64Gen_genSSEVEXPrefix2(x64GenContext_t* x64GenContext, sint32 xmmRegister1, sint32 xmmRegister2, bool use64BitMode)
{
@@ -44,7 +42,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe
// SSE2
// move two doubles from memory into xmm register
// MOVUPD , [+]
- if( memRegister == REG_ESP )
+ if( memRegister == X86_REG_ESP )
{
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
// 66 0F 10 84 E4 23 01 00 00
@@ -56,7 +54,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_NONE )
+ else if( memRegister == X86_REG_NONE )
{
assert_dbg();
//x64Gen_writeU8(x64GenContext, 0x66);
@@ -76,7 +74,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe
// SSE2
// move two doubles from memory into xmm register
// MOVUPD [+],
- if( memRegister == REG_ESP )
+ if( memRegister == X86_REG_ESP )
{
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
x64Gen_writeU8(x64GenContext, 0x66);
@@ -87,7 +85,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_NONE )
+ else if( memRegister == X86_REG_NONE )
{
assert_dbg();
//x64Gen_writeU8(x64GenContext, 0x66);
@@ -106,7 +104,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
{
// SSE3
// move one double from memory into lower and upper half of a xmm register
- if( memRegister == REG_RSP )
+ if( memRegister == X86_REG_RSP )
{
// MOVDDUP , [+]
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
@@ -119,7 +117,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_R15 )
+ else if( memRegister == X86_REG_R15 )
{
// MOVDDUP , [+]
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
@@ -131,7 +129,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_NONE )
+ else if( memRegister == X86_REG_NONE )
{
// MOVDDUP , []
// 36 F2 0F 12 05 - 00 00 00 00
@@ -185,7 +183,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
{
// SSE2
// move lower 64bits (double) of xmm register to memory location
- if( memRegister == REG_NONE )
+ if( memRegister == X86_REG_NONE )
{
// MOVSD [],
// F2 0F 11 05 - 45 23 01 00
@@ -197,7 +195,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
//x64Gen_writeU8(x64GenContext, 0x05+xmmRegister*8);
//x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_RSP )
+ else if( memRegister == X86_REG_RSP )
{
// MOVSD [RSP+],
// F2 0F 11 84 24 - 33 22 11 00
@@ -215,11 +213,42 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
}
}
+void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
+{
+ // SSE2
+ if( memRegister == X86_REG_RSP )
+ {
+ // MOVSD , [RSP+]
+ x64Gen_writeU8(x64GenContext, 0xF2);
+ x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
+ x64Gen_writeU8(x64GenContext, 0x0F);
+ x64Gen_writeU8(x64GenContext, 0x10);
+ x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
+ x64Gen_writeU8(x64GenContext, 0x24);
+ x64Gen_writeU32(x64GenContext, memImmU32);
+ }
+ else if( memRegister == 15 )
+ {
+ // MOVSD , [R15+]
+ x64Gen_writeU8(x64GenContext, 0x36);
+ x64Gen_writeU8(x64GenContext, 0xF2);
+ x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false);
+ x64Gen_writeU8(x64GenContext, 0x0F);
+ x64Gen_writeU8(x64GenContext, 0x10);
+ x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
+ x64Gen_writeU32(x64GenContext, memImmU32);
+ }
+ else
+ {
+ assert_dbg();
+ }
+}
+
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE3
// move one double from memory into lower half of a xmm register, leave upper half unchanged(?)
- if( memRegister == REG_NONE )
+ if( memRegister == X86_REG_NONE )
{
// MOVLPD , []
//x64Gen_writeU8(x64GenContext, 0x66);
@@ -229,7 +258,7 @@ void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmReg
//x64Gen_writeU32(x64GenContext, memImmU32);
assert_dbg();
}
- else if( memRegister == REG_RSP )
+ else if( memRegister == X86_REG_RSP )
{
// MOVLPD , [+]
// 66 0F 12 84 24 - 33 22 11 00
@@ -348,11 +377,11 @@ void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
- if (memRegister == REG_NONE)
+ if (memRegister == X86_REG_NONE)
{
assert_dbg();
}
- else if (memRegister == REG_R14)
+ else if (memRegister == X86_REG_R14)
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
@@ -404,7 +433,7 @@ void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
{
// SSE2
// compare bottom double with double from memory location
- if( memoryReg == REG_R15 )
+ if( memoryReg == X86_REG_R15 )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
@@ -432,7 +461,7 @@ void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
{
// SSE2
// compare bottom float with float from memory location
- if (memoryReg == REG_R15)
+ if (memoryReg == X86_REG_R15)
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
x64Gen_writeU8(x64GenContext, 0x0F);
@@ -448,7 +477,7 @@ void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRe
{
// SSE2
// and xmm register with 128 bit value from memory
- if( memReg == REG_R15 )
+ if( memReg == X86_REG_R15 )
{
x64Gen_genSSEVEXPrefix2(x64GenContext, memReg, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
@@ -464,7 +493,7 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
{
// SSE2
// xor xmm register with 128 bit value from memory
- if( memReg == REG_R15 )
+ if( memReg == X86_REG_R15 )
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
x64Gen_writeU8(x64GenContext, 0x0F);
@@ -479,11 +508,11 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
- if (memRegister == REG_NONE)
+ if (memRegister == X86_REG_NONE)
{
assert_dbg();
}
- else if (memRegister == REG_R14)
+ else if (memRegister == X86_REG_R14)
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
@@ -502,7 +531,7 @@ void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
{
// SSE2
// and xmm register with 128 bit value from memory
- if( memReg == REG_R15 )
+ if( memReg == X86_REG_R15 )
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
x64Gen_writeU8(x64GenContext, 0x0F);
@@ -528,7 +557,7 @@ void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xm
{
// SSE2
// doubleword integer compare
- if( memReg == REG_R15 )
+ if( memReg == X86_REG_R15 )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
@@ -563,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
}
+void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
+{
+ // SSE2
+ x64Gen_writeU8(x64GenContext, 0xF2);
+ x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
+ x64Gen_writeU8(x64GenContext, 0x0F);
+ x64Gen_writeU8(x64GenContext, 0x2A);
+ x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
+}
+
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
@@ -610,7 +649,7 @@ void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xm
{
// SSE2
// converts two signed 32bit integers to two doubles
- if( memReg == REG_RSP )
+ if( memReg == X86_REG_RSP )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, false);
@@ -684,7 +723,7 @@ void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
- if( memRegister == REG_NONE )
+ if( memRegister == X86_REG_NONE )
{
assert_dbg();
}
diff --git a/src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
similarity index 99%
rename from src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
index e936f1d8..b4021931 100644
--- a/src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
@@ -203,7 +203,6 @@ template
void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB)
{
static_assert(TA::getType() == MODRM_OPR_TYPE::REG);
- x64Gen_checkBuffer(x64GenContext);
// REX prefix
// 0100 WRXB
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h
new file mode 100644
index 00000000..eae3835d
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h
@@ -0,0 +1,4335 @@
+#pragma once
+
+// x86-64 assembler/emitter
+// auto generated. Do not edit this file manually
+
+typedef unsigned long long u64;
+typedef unsigned int u32;
+typedef unsigned short u16;
+typedef unsigned char u8;
+typedef signed long long s64;
+typedef signed int s32;
+typedef signed short s16;
+typedef signed char s8;
+
+enum X86Reg : sint8
+{
+ X86_REG_NONE = -1,
+ X86_REG_EAX = 0,
+ X86_REG_ECX = 1,
+ X86_REG_EDX = 2,
+ X86_REG_EBX = 3,
+ X86_REG_ESP = 4,
+ X86_REG_EBP = 5,
+ X86_REG_ESI = 6,
+ X86_REG_EDI = 7,
+ X86_REG_R8D = 8,
+ X86_REG_R9D = 9,
+ X86_REG_R10D = 10,
+ X86_REG_R11D = 11,
+ X86_REG_R12D = 12,
+ X86_REG_R13D = 13,
+ X86_REG_R14D = 14,
+ X86_REG_R15D = 15,
+ X86_REG_RAX = 0,
+ X86_REG_RCX = 1,
+ X86_REG_RDX = 2,
+ X86_REG_RBX = 3,
+ X86_REG_RSP = 4,
+ X86_REG_RBP = 5,
+ X86_REG_RSI = 6,
+ X86_REG_RDI = 7,
+ X86_REG_R8 = 8,
+ X86_REG_R9 = 9,
+ X86_REG_R10 = 10,
+ X86_REG_R11 = 11,
+ X86_REG_R12 = 12,
+ X86_REG_R13 = 13,
+ X86_REG_R14 = 14,
+ X86_REG_R15 = 15
+};
+
+enum X86Cond : u8
+{
+ X86_CONDITION_O = 0,
+ X86_CONDITION_NO = 1,
+ X86_CONDITION_B = 2,
+ X86_CONDITION_NB = 3,
+ X86_CONDITION_Z = 4,
+ X86_CONDITION_NZ = 5,
+ X86_CONDITION_BE = 6,
+ X86_CONDITION_NBE = 7,
+ X86_CONDITION_S = 8,
+ X86_CONDITION_NS = 9,
+ X86_CONDITION_PE = 10,
+ X86_CONDITION_PO = 11,
+ X86_CONDITION_L = 12,
+ X86_CONDITION_NL = 13,
+ X86_CONDITION_LE = 14,
+ X86_CONDITION_NLE = 15
+};
+class x86Assembler64
+{
+private:
+ std::vector m_buffer;
+
+public:
+ u8* GetBufferPtr() { return m_buffer.data(); };
+ std::span GetBuffer() { return m_buffer; };
+ u32 GetWriteIndex() { return (u32)m_buffer.size(); };
+ void _emitU8(u8 v) { m_buffer.emplace_back(v); };
+ void _emitU16(u16 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 2); *(u16*)(m_buffer.data() + writeIdx) = v; };
+ void _emitU32(u32 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 4); *(u32*)(m_buffer.data() + writeIdx) = v; };
+ void _emitU64(u64 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 8); *(u64*)(m_buffer.data() + writeIdx) = v; };
+ using GPR64 = X86Reg;
+ using GPR32 = X86Reg;
+ using GPR8_REX = X86Reg;
+ void LockPrefix() { _emitU8(0xF0); };
+ void ADD_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x00);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADD_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x00);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x02);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x01);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADD_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x01);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADD_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x01);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x01);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x03);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x03);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x08);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void OR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x08);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x09);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void OR_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x09);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void OR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x09);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x09);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x0b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x10);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADC_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x10);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x12);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x11);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADC_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x11);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADC_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x11);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x11);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x13);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x13);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x18);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SBB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x18);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x1a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x19);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SBB_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x19);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SBB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x19);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x19);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x1b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x1b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x20);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void AND_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x20);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x22);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x21);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void AND_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x21);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void AND_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x21);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x21);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x23);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x23);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x28);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SUB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x28);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x2a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x29);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SUB_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x29);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SUB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x29);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x29);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x2b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x2b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x30);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void XOR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x30);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x32);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x31);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void XOR_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x31);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void XOR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x31);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x31);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x33);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x33);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x38);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMP_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x38);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x3a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x39);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMP_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x39);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMP_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x39);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x39);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x3b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x3b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void ADD_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void ADD_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void ADD_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void OR_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void OR_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void OR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void OR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void ADC_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void ADC_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void ADC_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void ADC_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void SBB_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void SBB_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void SBB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void SBB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void AND_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void AND_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void AND_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void AND_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void SUB_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void SUB_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void SUB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void SUB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void XOR_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void XOR_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void XOR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void XOR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void CMP_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void CMP_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void CMP_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void CMP_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void ADD_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void ADD_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void ADD_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void ADD_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void OR_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void OR_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void OR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void OR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void ADC_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void ADC_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void ADC_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void ADC_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SBB_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void SBB_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void SBB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SBB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void AND_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void AND_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void AND_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void AND_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SUB_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void SUB_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void SUB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SUB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void XOR_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void XOR_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void XOR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void XOR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void CMP_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void CMP_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void CMP_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void CMP_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void TEST_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x84);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void TEST_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x84);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void TEST_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x85);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void TEST_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x85);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void TEST_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x85);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void TEST_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x85);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XCHG_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((dst >= 4) || (src >= 4))
+ {
+ _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ }
+ _emitU8(0x86);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ }
+ void XCHG_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x86);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XCHG_dd(GPR32 dst, GPR32 src)
+ {
+ if (((dst & 8) != 0) || ((src & 8) != 0))
+ {
+ _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ }
+ _emitU8(0x87);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ }
+ void XCHG_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ _emitU8(0x87);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ }
+ void XCHG_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x87);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XCHG_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x87);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x88);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void MOV_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x88);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x8a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x89);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void MOV_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x89);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void MOV_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x89);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x89);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x8b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x8b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xb8 | ((dst) & 7));
+ _emitU32((u32)imm);
+ }
+ void MOV_qi64(GPR64 dst, s64 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0xb8 | ((dst) & 7));
+ _emitU64((u64)imm);
+ }
+ void CALL_q(GPR64 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xff);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ }
+ void CALL_q_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xff);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void IMUL_ddi32(GPR32 dst, GPR32 src, s32 imm)
+ {
+ if (((dst & 8) != 0) || ((src & 8) != 0))
+ {
+ _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ }
+ _emitU8(0x69);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ _emitU32((u32)imm);
+ }
+ void IMUL_qqi32(GPR64 dst, GPR64 src, s32 imm)
+ {
+ _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ _emitU8(0x69);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ _emitU32((u32)imm);
+ }
+ void IMUL_ddi32_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x69);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void IMUL_qqi32_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x69);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void IMUL_ddi8(GPR32 dst, GPR32 src, s8 imm)
+ {
+ if (((dst & 8) != 0) || ((src & 8) != 0))
+ {
+ _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ }
+ _emitU8(0x6b);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ _emitU8((u8)imm);
+ }
+ void IMUL_qqi8(GPR64 dst, GPR64 src, s8 imm)
+ {
+ _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ _emitU8(0x6b);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ _emitU8((u8)imm);
+ }
+ void IMUL_ddi8_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x6b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void IMUL_qqi8_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x6b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SHL_b_CL(GPR8_REX dst)
+ {
+ if ((dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd2);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ }
+ void SHL_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd2);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHR_b_CL(GPR8_REX dst)
+ {
+ if ((dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd2);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ }
+ void SHR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd2);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SAR_b_CL(GPR8_REX dst)
+ {
+ if ((dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd2);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ }
+ void SAR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd2);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHL_d_CL(GPR32 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ }
+ void SHL_q_CL(GPR64 dst)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ }
+ void SHL_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHL_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHR_d_CL(GPR32 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ }
+ void SHR_q_CL(GPR64 dst)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ }
+ void SHR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SAR_d_CL(GPR32 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ }
+ void SAR_q_CL(GPR64 dst)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ }
+ void SAR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SAR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void JMP_j32(s32 imm)
+ {
+ _emitU8(0xe9);
+ _emitU32((u32)imm);
+ }
+ void Jcc_j32(X86Cond cond, s32 imm)
+ {
+ _emitU8(0x0f);
+ _emitU8(0x80 | (u8)cond);
+ _emitU32((u32)imm);
+ }
+ void SETcc_b(X86Cond cond, GPR8_REX dst)
+ {
+ if ((dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x0f);
+ _emitU8(0x90 | (u8)cond);
+ _emitU8((3 << 6) | (dst & 7));
+ }
+ void SETcc_b_l(X86Cond cond, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0f);
+ _emitU8(0x90);
+ _emitU8((mod << 6) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMPXCHG_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xb1);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMPXCHG_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x0f);
+ _emitU8(0xb1);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMPXCHG_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xb1);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMPXCHG_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x0f);
+ _emitU8(0xb1);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void BSWAP_d(GPR32 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xc8 | ((dst) & 7));
+ }
+ void BSWAP_q(GPR64 dst)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x0f);
+ _emitU8(0xc8 | ((dst) & 7));
+ }
+ void BT_du8(GPR32 dst, u8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xba);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void BT_qu8(GPR64 dst, u8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x0f);
+ _emitU8(0xba);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void BT_du8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xba);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void BT_qu8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x0f);
+ _emitU8(0xba);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+};
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h
new file mode 100644
index 00000000..bc0c27c5
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "IMLInstruction.h"
+#include "IMLSegment.h"
+
+// optimizer passes
+void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext);
+void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext);
+void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext);
+
+void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext);
+
+// debug
+void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut);
+void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false);
+void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false);
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
new file mode 100644
index 00000000..6ae4b591
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
@@ -0,0 +1,5 @@
+#include "IML.h"
+//#include "PPCRecompilerIml.h"
+#include "util/helpers/fixedSizeList.h"
+
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp
new file mode 100644
index 00000000..cd269869
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp
@@ -0,0 +1,561 @@
+#include "IML.h"
+#include "IMLInstruction.h"
+#include "IMLSegment.h"
+#include "IMLRegisterAllocatorRanges.h"
+#include "util/helpers/StringBuf.h"
+
+#include "../PPCRecompiler.h"
+
+const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
+{
+ static char _tempOpcodename[32];
+ uint32 op = iml->operation;
+ if (op == PPCREC_IML_OP_ASSIGN)
+ return "MOV";
+ else if (op == PPCREC_IML_OP_ADD)
+ return "ADD";
+ else if (op == PPCREC_IML_OP_ADD_WITH_CARRY)
+ return "ADC";
+ else if (op == PPCREC_IML_OP_SUB)
+ return "SUB";
+ else if (op == PPCREC_IML_OP_OR)
+ return "OR";
+ else if (op == PPCREC_IML_OP_AND)
+ return "AND";
+ else if (op == PPCREC_IML_OP_XOR)
+ return "XOR";
+ else if (op == PPCREC_IML_OP_LEFT_SHIFT)
+ return "LSH";
+ else if (op == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ return "RSH";
+ else if (op == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ return "ARSH";
+ else if (op == PPCREC_IML_OP_LEFT_ROTATE)
+ return "LROT";
+ else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED)
+ return "MULS";
+ else if (op == PPCREC_IML_OP_DIVIDE_SIGNED)
+ return "DIVS";
+ else if (op == PPCREC_IML_OP_FPR_ASSIGN)
+ return "FMOV";
+ else if (op == PPCREC_IML_OP_FPR_ADD)
+ return "FADD";
+ else if (op == PPCREC_IML_OP_FPR_SUB)
+ return "FSUB";
+ else if (op == PPCREC_IML_OP_FPR_MULTIPLY)
+ return "FMUL";
+ else if (op == PPCREC_IML_OP_FPR_DIVIDE)
+ return "FDIV";
+ else if (op == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
+ return "F32TOF64";
+ else if (op == PPCREC_IML_OP_FPR_ABS)
+ return "FABS";
+ else if (op == PPCREC_IML_OP_FPR_NEGATE)
+ return "FNEG";
+ else if (op == PPCREC_IML_OP_FPR_NEGATIVE_ABS)
+ return "FNABS";
+ else if (op == PPCREC_IML_OP_FPR_FLOAT_TO_INT)
+ return "F2I";
+ else if (op == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
+ return "I2F";
+ else if (op == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
+ return "BITMOVE";
+
+ sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type);
+ return _tempOpcodename;
+}
+
+std::string IMLDebug_GetRegName(IMLReg r)
+{
+ std::string regName;
+ uint32 regId = r.GetRegID();
+ switch (r.GetRegFormat())
+ {
+ case IMLRegFormat::F32:
+ regName.append("f");
+ break;
+ case IMLRegFormat::F64:
+ regName.append("fd");
+ break;
+ case IMLRegFormat::I32:
+ regName.append("i");
+ break;
+ case IMLRegFormat::I64:
+ regName.append("r");
+ break;
+ default:
+ DEBUG_BREAK;
+ }
+ regName.append(fmt::format("{}", regId));
+ return regName;
+}
+
+void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false)
+{
+ strOutput.add(IMLDebug_GetRegName(virtualRegister));
+ if (!isLast)
+ strOutput.add(", ");
+}
+
+void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false)
+{
+ if (val < 0)
+ {
+ strOutput.add("-");
+ val = -val;
+ }
+ strOutput.addFmt("0x{:08x}", val);
+ if (!isLast)
+ strOutput.add(", ");
+}
+
+void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset)
+{
+ // pad to 70 characters
+ sint32 index = currentLineText.getLen();
+ while (index < 70)
+ {
+ currentLineText.add(" ");
+ index++;
+ }
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ if (subrangeItr->interval.start.GetInstructionIndexEx() == offset)
+ {
+ if(subrangeItr->interval.start.IsInstructionIndex() && !subrangeItr->interval.start.IsOnInputEdge())
+ currentLineText.add(".");
+ else
+ currentLineText.add("|");
+
+ currentLineText.addFmt("{:<4}", subrangeItr->GetVirtualRegister());
+ }
+ else if (subrangeItr->interval.end.GetInstructionIndexEx() == offset)
+ {
+ if(subrangeItr->interval.end.IsInstructionIndex() && !subrangeItr->interval.end.IsOnOutputEdge())
+ currentLineText.add("* ");
+ else
+ currentLineText.add("| ");
+ }
+ else if (subrangeItr->interval.ContainsInstructionIndexEx(offset))
+ {
+ currentLineText.add("| ");
+ }
+ else
+ {
+ currentLineText.add(" ");
+ }
+ index += 5;
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+}
+
+std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg)
+{
+ if (!ctx)
+ {
+ return "";
+ }
+ // find segment index
+ for (size_t i = 0; i < ctx->segmentList2.size(); i++)
+ {
+ if (ctx->segmentList2[i] == seg)
+ {
+ return fmt::format("Seg{:04x}", i);
+ }
+ }
+ return "";
+}
+
+std::string IMLDebug_GetConditionName(IMLCondition cond)
+{
+ switch (cond)
+ {
+ case IMLCondition::EQ:
+ return "EQ";
+ case IMLCondition::NEQ:
+ return "NEQ";
+ case IMLCondition::UNSIGNED_GT:
+ return "UGT";
+ case IMLCondition::UNSIGNED_LT:
+ return "ULT";
+ case IMLCondition::SIGNED_GT:
+ return "SGT";
+ case IMLCondition::SIGNED_LT:
+ return "SLT";
+ default:
+ cemu_assert_unimplemented();
+ }
+ return "ukn";
+}
+
+void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut)
+{
+ const sint32 lineOffsetParameters = 10;//18;
+
+ StringBuf strOutput(1024);
+ strOutput.reset();
+ if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R)
+ {
+ if (inst.type == PPCREC_IML_TYPE_R_NAME)
+ strOutput.add("R_NAME");
+ else
+ strOutput.add("NAME_R");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ if(inst.type == PPCREC_IML_TYPE_R_NAME)
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR);
+
+ strOutput.add("name_");
+ if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999))
+ {
+ strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
+ }
+ if (inst.op_r_name.name >= PPCREC_NAME_FPR_HALF && inst.op_r_name.name < (PPCREC_NAME_FPR_HALF + 32*2))
+ {
+ strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR_HALF - inst.op_r_name.name)/2));
+ if ((inst.op_r_name.name-PPCREC_NAME_FPR_HALF)&1)
+ strOutput.add(".ps1");
+ else
+ strOutput.add(".ps0");
+ }
+ else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999))
+ {
+ strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0);
+ }
+ else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST)
+ strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR);
+ else if (inst.op_r_name.name == PPCREC_NAME_XER_CA)
+ strOutput.add("xer.ca");
+ else if (inst.op_r_name.name == PPCREC_NAME_XER_SO)
+ strOutput.add("xer.so");
+ else if (inst.op_r_name.name == PPCREC_NAME_XER_OV)
+ strOutput.add("xer.ov");
+ else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA)
+ strOutput.add("cpuReservation.ea");
+ else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL)
+ strOutput.add("cpuReservation.value");
+ else
+ {
+ strOutput.addFmt("name_ukn{}", inst.op_r_name.name);
+ }
+ if (inst.type != PPCREC_IML_TYPE_R_NAME)
+ {
+ strOutput.add(", ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true);
+ }
+
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R_R)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_COMPARE)
+ {
+ strOutput.add("CMP ");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB);
+ strOutput.addFmt("{}", IMLDebug_GetConditionName(inst.op_compare.cond));
+ strOutput.add(" -> ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ strOutput.add("CMP ");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA);
+ strOutput.addFmt("{}", inst.op_compare_s32.immS32);
+ strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond));
+ strOutput.add(" -> ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ strOutput.add("CJUMP ");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true);
+ if (!inst.op_conditional_jump.mustBeTrue)
+ strOutput.add("(inverted)");
+ }
+ else if (inst.type == PPCREC_IML_TYPE_JUMP)
+ {
+ strOutput.add("JUMP");
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA);
+ IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA);
+ IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_S32)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR);
+ IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE ||
+ inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
+ {
+ if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ strOutput.add("LD_");
+ else
+ strOutput.add("ST_");
+
+ if (inst.op_storeLoad.flags2.signExtend)
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{}", inst.op_storeLoad.copyWidth);
+
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData);
+
+ if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
+ strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2));
+ else
+ strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ strOutput.add("ATOMIC_ST_U32");
+
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_NO_OP)
+ {
+ strOutput.add("NOP");
+ }
+ else if (inst.type == PPCREC_IML_TYPE_MACRO)
+ {
+ if (inst.operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg));
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_BL)
+ {
+ strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16);
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_B_FAR)
+ {
+ strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16);
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_LEAVE)
+ {
+ strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param);
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_HLE)
+ {
+ strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2);
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES)
+ {
+ strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param);
+ }
+ else
+ {
+ strOutput.addFmt("MACRO ukn operation {}", inst.operation);
+ }
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD)
+ {
+ strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData));
+ if (inst.op_storeLoad.flags2.signExtend)
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode);
+ if (inst.op_storeLoad.flags2.notExpanded)
+ {
+ strOutput.addFmt(" ");
+ }
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_STORE)
+ {
+ if (inst.op_storeLoad.flags2.signExtend)
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32);
+ strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_R)
+ {
+ strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
+ strOutput.addFmt("{}", IMLDebug_GetRegName(inst.op_fpr_r.regR));
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_R_R)
+ {
+ strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
+ strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA));
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ {
+ strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
+ strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC));
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R)
+ {
+ strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
+ strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB));
+ }
+ else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
+ {
+ strOutput.addFmt("CYCLE_CHECK");
+ }
+ else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ {
+ strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond));
+ }
+ else
+ {
+ strOutput.addFmt("Unknown iml type {}", inst.type);
+ }
+ disassemblyLineOut.assign(strOutput.c_str());
+}
+
+void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
+{
+ StringBuf strOutput(4096);
+
+ strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth);
+ if (imlSegment->isEnterable)
+ {
+ strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress);
+ }
+ if (imlSegment->deadCodeEliminationHintSeg)
+ {
+ strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg));
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+
+ if (printLivenessRangeInfo)
+ {
+ strOutput.reset();
+ IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START);
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ }
+ //debug_printf("\n");
+ strOutput.reset();
+
+ std::string disassemblyLine;
+ for (sint32 i = 0; i < imlSegment->imlList.size(); i++)
+ {
+ const IMLInstruction& inst = imlSegment->imlList[i];
+ // don't log NOP instructions
+ if (inst.type == PPCREC_IML_TYPE_NO_OP)
+ continue;
+ strOutput.reset();
+ strOutput.addFmt("{:02x} ", i);
+ //cemuLog_log(LogType::Force, "{:02x} ", i);
+ disassemblyLine.clear();
+ IMLDebug_DisassembleInstruction(inst, disassemblyLine);
+ strOutput.add(disassemblyLine);
+ if (printLivenessRangeInfo)
+ {
+ IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i);
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ }
+ // all ranges
+ if (printLivenessRangeInfo)
+ {
+ strOutput.reset();
+ strOutput.add("Ranges-VirtReg ");
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ strOutput.addFmt("v{:<4}", (uint32)subrangeItr->GetVirtualRegister());
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ strOutput.reset();
+ strOutput.add("Ranges-PhysReg ");
+ subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ strOutput.addFmt("p{:<4}", subrangeItr->GetPhysicalRegister());
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ }
+ // branch info
+ strOutput.reset();
+ strOutput.add("Links from: ");
+ for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++)
+ {
+ if (i)
+ strOutput.add(", ");
+ strOutput.addFmt("{}", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str());
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ if (imlSegment->nextSegmentBranchNotTaken)
+ cemuLog_log(LogType::Force, "BranchNotTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str());
+ if (imlSegment->nextSegmentBranchTaken)
+ cemuLog_log(LogType::Force, "BranchTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str());
+ if (imlSegment->nextSegmentIsUncertain)
+ cemuLog_log(LogType::Force, "Dynamic target");
+}
+
+void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo)
+{
+ for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++)
+ {
+ IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo);
+ cemuLog_log(LogType::Force, "");
+ }
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp
new file mode 100644
index 00000000..997de4e9
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp
@@ -0,0 +1,536 @@
+#include "IMLInstruction.h"
+#include "IML.h"
+
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+
+// return true if an instruction has side effects on top of just reading and writing registers
+bool IMLInstruction::HasSideEffects() const
+{
+ bool hasSideEffects = true;
+ if(type == PPCREC_IML_TYPE_R_R || type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32)
+ hasSideEffects = false;
+ // todo - add more cases
+ return hasSideEffects;
+}
+
+void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
+{
+ registersUsed->readGPR1 = IMLREG_INVALID;
+ registersUsed->readGPR2 = IMLREG_INVALID;
+ registersUsed->readGPR3 = IMLREG_INVALID;
+ registersUsed->readGPR4 = IMLREG_INVALID;
+ registersUsed->writtenGPR1 = IMLREG_INVALID;
+ registersUsed->writtenGPR2 = IMLREG_INVALID;
+ if (type == PPCREC_IML_TYPE_R_NAME)
+ {
+ registersUsed->writtenGPR1 = op_r_name.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_NAME_R)
+ {
+ registersUsed->readGPR1 = op_r_name.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_R_R)
+ {
+ if (operation == PPCREC_IML_OP_X86_CMP)
+ {
+ // both operands are read only
+ registersUsed->readGPR1 = op_r_r.regR;
+ registersUsed->readGPR2 = op_r_r.regA;
+ }
+ else if (
+ operation == PPCREC_IML_OP_ASSIGN ||
+ operation == PPCREC_IML_OP_ENDIAN_SWAP ||
+ operation == PPCREC_IML_OP_CNTLZW ||
+ operation == PPCREC_IML_OP_NOT ||
+ operation == PPCREC_IML_OP_NEG ||
+ operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 ||
+ operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32)
+ {
+ // result is written, operand is read
+ registersUsed->writtenGPR1 = op_r_r.regR;
+ registersUsed->readGPR1 = op_r_r.regA;
+ }
+ else
+ cemu_assert_unimplemented();
+ }
+ else if (type == PPCREC_IML_TYPE_R_S32)
+ {
+ cemu_assert_debug(operation != PPCREC_IML_OP_ADD &&
+ operation != PPCREC_IML_OP_SUB &&
+ operation != PPCREC_IML_OP_AND &&
+ operation != PPCREC_IML_OP_OR &&
+ operation != PPCREC_IML_OP_XOR); // deprecated, use r_r_s32 for these
+
+ if (operation == PPCREC_IML_OP_LEFT_ROTATE)
+ {
+ // register operand is read and write
+ registersUsed->readGPR1 = op_r_immS32.regR;
+ registersUsed->writtenGPR1 = op_r_immS32.regR;
+ }
+ else if (operation == PPCREC_IML_OP_X86_CMP)
+ {
+ // register operand is read only
+ registersUsed->readGPR1 = op_r_immS32.regR;
+ }
+ else
+ {
+ // register operand is write only
+ // todo - use explicit lists, avoid default cases
+ registersUsed->writtenGPR1 = op_r_immS32.regR;
+ }
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ registersUsed->writtenGPR1 = op_r_r_s32.regR;
+ registersUsed->readGPR1 = op_r_r_s32.regA;
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ registersUsed->writtenGPR1 = op_r_r_s32_carry.regR;
+ registersUsed->readGPR1 = op_r_r_s32_carry.regA;
+ // some operations read carry
+ switch (operation)
+ {
+ case PPCREC_IML_OP_ADD_WITH_CARRY:
+ registersUsed->readGPR2 = op_r_r_s32_carry.regCarry;
+ break;
+ case PPCREC_IML_OP_ADD:
+ break;
+ default:
+ cemu_assert_unimplemented();
+ }
+ // carry is always written
+ registersUsed->writtenGPR2 = op_r_r_s32_carry.regCarry;
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_R)
+ {
+ // in all cases result is written and other operands are read only
+ // with the exception of XOR, where if regA == regB then all bits are zeroed out. So we don't consider it a read
+ registersUsed->writtenGPR1 = op_r_r_r.regR;
+ if(!(operation == PPCREC_IML_OP_XOR && op_r_r_r.regA == op_r_r_r.regB))
+ {
+ registersUsed->readGPR1 = op_r_r_r.regA;
+ registersUsed->readGPR2 = op_r_r_r.regB;
+ }
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ registersUsed->writtenGPR1 = op_r_r_r_carry.regR;
+ registersUsed->readGPR1 = op_r_r_r_carry.regA;
+ registersUsed->readGPR2 = op_r_r_r_carry.regB;
+ // some operations read carry
+ switch (operation)
+ {
+ case PPCREC_IML_OP_ADD_WITH_CARRY:
+ registersUsed->readGPR3 = op_r_r_r_carry.regCarry;
+ break;
+ case PPCREC_IML_OP_ADD:
+ break;
+ default:
+ cemu_assert_unimplemented();
+ }
+ // carry is always written
+ registersUsed->writtenGPR2 = op_r_r_r_carry.regCarry;
+ }
+ else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
+ {
+ // no effect on registers
+ }
+ else if (type == PPCREC_IML_TYPE_NO_OP)
+ {
+ // no effect on registers
+ }
+ else if (type == PPCREC_IML_TYPE_MACRO)
+ {
+ if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE)
+ {
+ // no effect on registers
+ }
+ else if (operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ cemu_assert_debug(op_macro.paramReg.IsValid());
+ registersUsed->readGPR1 = op_macro.paramReg;
+ }
+ else
+ cemu_assert_unimplemented();
+ }
+ else if (type == PPCREC_IML_TYPE_COMPARE)
+ {
+ registersUsed->readGPR1 = op_compare.regA;
+ registersUsed->readGPR2 = op_compare.regB;
+ registersUsed->writtenGPR1 = op_compare.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ registersUsed->readGPR1 = op_compare_s32.regA;
+ registersUsed->writtenGPR1 = op_compare_s32.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ registersUsed->readGPR1 = op_conditional_jump.registerBool;
+ }
+ else if (type == PPCREC_IML_TYPE_JUMP)
+ {
+ // no registers affected
+ }
+ else if (type == PPCREC_IML_TYPE_LOAD)
+ {
+ registersUsed->writtenGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR1 = op_storeLoad.registerMem;
+ }
+ else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ {
+ registersUsed->writtenGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR1 = op_storeLoad.registerMem;
+ if (op_storeLoad.registerMem2.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem2;
+ }
+ else if (type == PPCREC_IML_TYPE_STORE)
+ {
+ registersUsed->readGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem;
+ }
+ else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
+ {
+ registersUsed->readGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem;
+ if (op_storeLoad.registerMem2.IsValid())
+ registersUsed->readGPR3 = op_storeLoad.registerMem2;
+ }
+ else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ registersUsed->readGPR1 = op_atomic_compare_store.regEA;
+ registersUsed->readGPR2 = op_atomic_compare_store.regCompareValue;
+ registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue;
+ registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut;
+ }
+ else if (type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ if (op_call_imm.regParam0.IsValid())
+ registersUsed->readGPR1 = op_call_imm.regParam0;
+ if (op_call_imm.regParam1.IsValid())
+ registersUsed->readGPR2 = op_call_imm.regParam1;
+ if (op_call_imm.regParam2.IsValid())
+ registersUsed->readGPR3 = op_call_imm.regParam2;
+ registersUsed->writtenGPR1 = op_call_imm.regReturn;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_LOAD)
+ {
+ // fpr load operation
+ registersUsed->writtenGPR1 = op_storeLoad.registerData;
+ // address is in gpr register
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR1 = op_storeLoad.registerMem;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
+ {
+ // fpr load operation
+ registersUsed->writtenGPR1 = op_storeLoad.registerData;
+ // address is in gpr registers
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR1 = op_storeLoad.registerMem;
+ if (op_storeLoad.registerMem2.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem2;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_STORE)
+ {
+ // fpr store operation
+ registersUsed->readGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
+ {
+ // fpr store operation
+ registersUsed->readGPR1 = op_storeLoad.registerData;
+ // address is in gpr registers
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem;
+ if (op_storeLoad.registerMem2.IsValid())
+ registersUsed->readGPR3 = op_storeLoad.registerMem2;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R)
+ {
+ // fpr operation
+ if (
+ operation == PPCREC_IML_OP_FPR_ASSIGN ||
+ operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
+ operation == PPCREC_IML_OP_FPR_FCTIWZ
+ )
+ {
+ registersUsed->readGPR1 = op_fpr_r_r.regA;
+ registersUsed->writtenGPR1 = op_fpr_r_r.regR;
+ }
+ else if (operation == PPCREC_IML_OP_FPR_MULTIPLY ||
+ operation == PPCREC_IML_OP_FPR_DIVIDE ||
+ operation == PPCREC_IML_OP_FPR_ADD ||
+ operation == PPCREC_IML_OP_FPR_SUB)
+ {
+ registersUsed->readGPR1 = op_fpr_r_r.regA;
+ registersUsed->readGPR2 = op_fpr_r_r.regR;
+ registersUsed->writtenGPR1 = op_fpr_r_r.regR;
+
+ }
+ else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
+ operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ||
+ operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
+ {
+ registersUsed->writtenGPR1 = op_fpr_r_r.regR;
+ registersUsed->readGPR1 = op_fpr_r_r.regA;
+ }
+ else
+ cemu_assert_unimplemented();
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
+ {
+ // fpr operation
+ registersUsed->readGPR1 = op_fpr_r_r_r.regA;
+ registersUsed->readGPR2 = op_fpr_r_r_r.regB;
+ registersUsed->writtenGPR1 = op_fpr_r_r_r.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ {
+ // fpr operation
+ registersUsed->readGPR1 = op_fpr_r_r_r_r.regA;
+ registersUsed->readGPR2 = op_fpr_r_r_r_r.regB;
+ registersUsed->readGPR3 = op_fpr_r_r_r_r.regC;
+ registersUsed->writtenGPR1 = op_fpr_r_r_r_r.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R)
+ {
+ // fpr operation
+ if (operation == PPCREC_IML_OP_FPR_NEGATE ||
+ operation == PPCREC_IML_OP_FPR_ABS ||
+ operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS ||
+ operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
+ operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
+ {
+ registersUsed->readGPR1 = op_fpr_r.regR;
+ registersUsed->writtenGPR1 = op_fpr_r.regR;
+ }
+ else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE)
+ {
+ registersUsed->writtenGPR1 = op_fpr_r.regR;
+ }
+ else
+ cemu_assert_unimplemented();
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
+ {
+ registersUsed->writtenGPR1 = op_fpr_compare.regR;
+ registersUsed->readGPR1 = op_fpr_compare.regA;
+ registersUsed->readGPR2 = op_fpr_compare.regB;
+ }
+ else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ {
+ // no registers read or written (except for the implicit eflags)
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+}
+
+IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map& translationTable)
+{
+ if (reg.IsInvalid())
+ return reg;
+ const auto& it = translationTable.find(reg.GetRegID());
+ cemu_assert_debug(it != translationTable.cend());
+ IMLReg alteredReg = reg;
+ alteredReg.SetRegID(it->second);
+ return alteredReg;
+}
+
+void IMLInstruction::RewriteGPR(const std::unordered_map& translationTable)
+{
+ if (type == PPCREC_IML_TYPE_R_NAME)
+ {
+ op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_NAME_R)
+ {
+ op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R)
+ {
+ op_r_r.regR = replaceRegisterIdMultiple(op_r_r.regR, translationTable);
+ op_r_r.regA = replaceRegisterIdMultiple(op_r_r.regA, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_S32)
+ {
+ op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable);
+ op_r_r_s32.regA = replaceRegisterIdMultiple(op_r_r_s32.regA, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ op_r_r_s32_carry.regR = replaceRegisterIdMultiple(op_r_r_s32_carry.regR, translationTable);
+ op_r_r_s32_carry.regA = replaceRegisterIdMultiple(op_r_r_s32_carry.regA, translationTable);
+ op_r_r_s32_carry.regCarry = replaceRegisterIdMultiple(op_r_r_s32_carry.regCarry, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_R)
+ {
+ op_r_r_r.regR = replaceRegisterIdMultiple(op_r_r_r.regR, translationTable);
+ op_r_r_r.regA = replaceRegisterIdMultiple(op_r_r_r.regA, translationTable);
+ op_r_r_r.regB = replaceRegisterIdMultiple(op_r_r_r.regB, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ op_r_r_r_carry.regR = replaceRegisterIdMultiple(op_r_r_r_carry.regR, translationTable);
+ op_r_r_r_carry.regA = replaceRegisterIdMultiple(op_r_r_r_carry.regA, translationTable);
+ op_r_r_r_carry.regB = replaceRegisterIdMultiple(op_r_r_r_carry.regB, translationTable);
+ op_r_r_r_carry.regCarry = replaceRegisterIdMultiple(op_r_r_r_carry.regCarry, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_COMPARE)
+ {
+ op_compare.regR = replaceRegisterIdMultiple(op_compare.regR, translationTable);
+ op_compare.regA = replaceRegisterIdMultiple(op_compare.regA, translationTable);
+ op_compare.regB = replaceRegisterIdMultiple(op_compare.regB, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ op_compare_s32.regR = replaceRegisterIdMultiple(op_compare_s32.regR, translationTable);
+ op_compare_s32.regA = replaceRegisterIdMultiple(op_compare_s32.regA, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ op_conditional_jump.registerBool = replaceRegisterIdMultiple(op_conditional_jump.registerBool, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP)
+ {
+ // no effect on registers
+ }
+ else if (type == PPCREC_IML_TYPE_NO_OP)
+ {
+ // no effect on registers
+ }
+ else if (type == PPCREC_IML_TYPE_MACRO)
+ {
+ if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_COUNT_CYCLES)
+ {
+ // no effect on registers
+ }
+ else if (operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ op_macro.paramReg = replaceRegisterIdMultiple(op_macro.paramReg, translationTable);
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+ }
+ else if (type == PPCREC_IML_TYPE_LOAD)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ if (op_storeLoad.registerMem.IsValid())
+ {
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ }
+ }
+ else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ if (op_storeLoad.registerMem.IsValid())
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ if (op_storeLoad.registerMem2.IsValid())
+ op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_STORE)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ if (op_storeLoad.registerMem.IsValid())
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ if (op_storeLoad.registerMem.IsValid())
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ if (op_storeLoad.registerMem2.IsValid())
+ op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ op_atomic_compare_store.regEA = replaceRegisterIdMultiple(op_atomic_compare_store.regEA, translationTable);
+ op_atomic_compare_store.regCompareValue = replaceRegisterIdMultiple(op_atomic_compare_store.regCompareValue, translationTable);
+ op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable);
+ op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ op_call_imm.regReturn = replaceRegisterIdMultiple(op_call_imm.regReturn, translationTable);
+ if (op_call_imm.regParam0.IsValid())
+ op_call_imm.regParam0 = replaceRegisterIdMultiple(op_call_imm.regParam0, translationTable);
+ if (op_call_imm.regParam1.IsValid())
+ op_call_imm.regParam1 = replaceRegisterIdMultiple(op_call_imm.regParam1, translationTable);
+ if (op_call_imm.regParam2.IsValid())
+ op_call_imm.regParam2 = replaceRegisterIdMultiple(op_call_imm.regParam2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_LOAD)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_STORE)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R)
+ {
+ op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R)
+ {
+ op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, translationTable);
+ op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
+ {
+ op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, translationTable);
+ op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, translationTable);
+ op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ {
+ op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, translationTable);
+ op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, translationTable);
+ op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, translationTable);
+ op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
+ {
+ op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, translationTable);
+ op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable);
+ op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ {
+ // no registers read or written (except for the implicit eflags)
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h
new file mode 100644
index 00000000..4df2a666
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h
@@ -0,0 +1,826 @@
+#pragma once
+
+using IMLRegID = uint16; // 16 bit ID
+using IMLPhysReg = sint32; // arbitrary value that is up to the architecture backend, usually this will be the register index. A value of -1 is reserved and means not assigned
+
+// format of IMLReg:
+// 0-15 (16 bit) IMLRegID
+// 19-23 (5 bit) Offset In elements, for SIMD registers
+// 24-27 (4 bit) IMLRegFormat RegFormat
+// 28-31 (4 bit) IMLRegFormat BaseFormat
+
+enum class IMLRegFormat : uint8
+{
+ INVALID_FORMAT,
+ I64,
+ I32,
+ I16,
+ I8,
+ // I1 ?
+ F64,
+ F32,
+ TYPE_COUNT,
+};
+
+class IMLReg
+{
+public:
+ IMLReg()
+ {
+ m_raw = 0; // 0 is invalid
+ }
+
+ IMLReg(IMLRegFormat baseRegFormat, IMLRegFormat regFormat, uint8 viewOffset, IMLRegID regId)
+ {
+ m_raw = 0;
+ m_raw |= ((uint8)baseRegFormat << 28);
+ m_raw |= ((uint8)regFormat << 24);
+ m_raw |= (uint32)regId;
+ }
+
+ IMLReg(IMLReg&& baseReg, IMLRegFormat viewFormat, uint8 viewOffset, IMLRegID regId)
+ {
+ DEBUG_BREAK;
+ //m_raw = 0;
+ //m_raw |= ((uint8)baseRegFormat << 28);
+ //m_raw |= ((uint8)viewFormat << 24);
+ //m_raw |= (uint32)regId;
+ }
+
+ IMLReg(const IMLReg& other) : m_raw(other.m_raw) {}
+
+ IMLRegFormat GetBaseFormat() const
+ {
+ return (IMLRegFormat)((m_raw >> 28) & 0xF);
+ }
+
+ IMLRegFormat GetRegFormat() const
+ {
+ return (IMLRegFormat)((m_raw >> 24) & 0xF);
+ }
+
+ IMLRegID GetRegID() const
+ {
+ cemu_assert_debug(GetBaseFormat() != IMLRegFormat::INVALID_FORMAT);
+ cemu_assert_debug(GetRegFormat() != IMLRegFormat::INVALID_FORMAT);
+ return (IMLRegID)(m_raw & 0xFFFF);
+ }
+
+ void SetRegID(IMLRegID regId)
+ {
+ cemu_assert_debug(regId <= 0xFFFF);
+ m_raw &= ~0xFFFF;
+ m_raw |= (uint32)regId;
+ }
+
+ bool IsInvalid() const
+ {
+ return GetBaseFormat() == IMLRegFormat::INVALID_FORMAT;
+ }
+
+ bool IsValid() const
+ {
+ return GetBaseFormat() != IMLRegFormat::INVALID_FORMAT;
+ }
+
+ bool IsValidAndSameRegID(IMLRegID regId) const
+ {
+ return IsValid() && GetRegID() == regId;
+ }
+
+ // compare all fields
+ bool operator==(const IMLReg& other) const
+ {
+ return m_raw == other.m_raw;
+ }
+
+private:
+ uint32 m_raw;
+};
+
+static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0);
+static const IMLRegID IMLRegID_INVALID(0xFFFF);
+
+using IMLName = uint32;
+
+enum
+{
+ PPCREC_IML_OP_ASSIGN, // '=' operator
+ PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
+ PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
+ PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
+ PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
+ PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
+ PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
+
+ // binary operation
+ PPCREC_IML_OP_OR, // '|' operator
+ PPCREC_IML_OP_AND, // '&' operator
+ PPCREC_IML_OP_XOR, // '^' operator
+ PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
+ PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
+ PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned)
+ PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed)
+ // ppc
+ PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
+ PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
+ PPCREC_IML_OP_CNTLZW,
+ // FPU
+ PPCREC_IML_OP_FPR_ASSIGN,
+ PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
+ PPCREC_IML_OP_FPR_ADD,
+ PPCREC_IML_OP_FPR_SUB,
+ PPCREC_IML_OP_FPR_MULTIPLY,
+ PPCREC_IML_OP_FPR_DIVIDE,
+ PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, // expand f32 to f64 in-place
+ PPCREC_IML_OP_FPR_NEGATE,
+ PPCREC_IML_OP_FPR_ABS, // abs(fpr)
+ PPCREC_IML_OP_FPR_NEGATIVE_ABS, // -abs(fpr)
+ PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
+ PPCREC_IML_OP_FPR_FCTIWZ,
+ PPCREC_IML_OP_FPR_SELECT, // selectively copy bottom value from operand B or C based on value in operand A
+ // Conversion (FPR_R_R)
+ PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
+ PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
+
+ // Bitcast (FPR_R_R)
+ PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT,
+
+ // R_R_R + R_R_S32
+ PPCREC_IML_OP_ADD, // also R_R_R_CARRY
+ PPCREC_IML_OP_SUB,
+
+ // R_R only
+ PPCREC_IML_OP_NOT,
+ PPCREC_IML_OP_NEG,
+ PPCREC_IML_OP_ASSIGN_S16_TO_S32,
+ PPCREC_IML_OP_ASSIGN_S8_TO_S32,
+
+ // R_R_R_carry
+ PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1)
+
+ // X86 extension
+ PPCREC_IML_OP_X86_CMP, // R_R and R_S32
+
+ PPCREC_IML_OP_INVALID
+};
+
+#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
+
+enum
+{
+ PPCREC_IML_MACRO_B_TO_REG, // branch to PPC address in register (used for BCCTR, BCLR)
+
+ PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
+ PPCREC_IML_MACRO_B_FAR, // branch to different function
+ PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
+ PPCREC_IML_MACRO_HLE, // HLE function call
+ PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
+ // debugging
+ PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
+};
+
+enum class IMLCondition : uint8
+{
+ EQ,
+ NEQ,
+ SIGNED_GT,
+ SIGNED_LT,
+ UNSIGNED_GT,
+ UNSIGNED_LT,
+
+ // floating point conditions
+ UNORDERED_GT, // a > b, false if either is NaN
+ UNORDERED_LT, // a < b, false if either is NaN
+ UNORDERED_EQ, // a == b, false if either is NaN
+ UNORDERED_U, // unordered (true if either operand is NaN)
+
+ ORDERED_GT,
+ ORDERED_LT,
+ ORDERED_EQ,
+ ORDERED_U
+};
+
+enum
+{
+ PPCREC_IML_TYPE_NONE,
+ PPCREC_IML_TYPE_NO_OP, // no-op instruction
+ PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r)
+ PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
+ PPCREC_IML_TYPE_R_R_R_CARRY, // r* = r* (op) r* (reads and/or updates carry)
+ PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
+ PPCREC_IML_TYPE_R_R_S32_CARRY, // r* = r* (op) s32* (reads and/or updates carry)
+ PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
+ PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
+ PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
+ PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
+ PPCREC_IML_TYPE_R_NAME, // r* = name
+ PPCREC_IML_TYPE_NAME_R, // name* = r*
+ PPCREC_IML_TYPE_R_S32, // r* (op) imm
+ PPCREC_IML_TYPE_MACRO,
+ PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0
+
+ // conditions and branches
+ PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r*
+ PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm
+ PPCREC_IML_TYPE_JUMP, // jump always
+ PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register
+
+ // atomic
+ PPCREC_IML_TYPE_ATOMIC_CMP_STORE,
+
+ // function call
+ PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address
+
+ // FPR
+ PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_R_R,
+ PPCREC_IML_TYPE_FPR_R_R_R,
+ PPCREC_IML_TYPE_FPR_R_R_R_R,
+ PPCREC_IML_TYPE_FPR_R,
+
+ PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r*
+
+ // X86 specific
+ PPCREC_IML_TYPE_X86_EFLAGS_JCC,
+};
+
+enum // IMLName
+{
+ PPCREC_NAME_NONE,
+ PPCREC_NAME_TEMPORARY = 1000,
+ PPCREC_NAME_R0 = 2000,
+ PPCREC_NAME_SPR0 = 3000,
+ PPCREC_NAME_FPR_HALF = 4800, // Counts PS0 and PS1 separately. E.g. fp3.ps1 is at offset 3 * 2 + 1
+ PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
+ PPCREC_NAME_XER_CA = 6000, // carry bit from XER
+ PPCREC_NAME_XER_OV = 6001, // overflow bit from XER
+ PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER
+ PPCREC_NAME_CR = 7000, // CR register bits (31 to 0)
+ PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31,
+ PPCREC_NAME_CPU_MEMRES_EA = 8000,
+ PPCREC_NAME_CPU_MEMRES_VAL = 8001
+};
+
+#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead
+
+enum
+{
+ // fpr load
+ PPCREC_FPR_LD_MODE_SINGLE,
+ PPCREC_FPR_LD_MODE_DOUBLE,
+
+ // fpr store
+ PPCREC_FPR_ST_MODE_SINGLE,
+ PPCREC_FPR_ST_MODE_DOUBLE,
+
+ PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
+};
+
+struct IMLUsedRegisters
+{
+ IMLUsedRegisters() {};
+
+ bool IsWrittenByRegId(IMLRegID regId) const
+ {
+ if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId)
+ return true;
+ if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId)
+ return true;
+ return false;
+ }
+
+ bool IsBaseGPRWritten(IMLReg imlReg) const
+ {
+ cemu_assert_debug(imlReg.IsValid());
+ auto regId = imlReg.GetRegID();
+ return IsWrittenByRegId(regId);
+ }
+
+ template
+ void ForEachWrittenGPR(Fn F) const
+ {
+ if (writtenGPR1.IsValid())
+ F(writtenGPR1);
+ if (writtenGPR2.IsValid())
+ F(writtenGPR2);
+ }
+
+ template
+ void ForEachReadGPR(Fn F) const
+ {
+ if (readGPR1.IsValid())
+ F(readGPR1);
+ if (readGPR2.IsValid())
+ F(readGPR2);
+ if (readGPR3.IsValid())
+ F(readGPR3);
+ if (readGPR4.IsValid())
+ F(readGPR4);
+ }
+
+ template
+ void ForEachAccessedGPR(Fn F) const
+ {
+ // GPRs
+ if (readGPR1.IsValid())
+ F(readGPR1, false);
+ if (readGPR2.IsValid())
+ F(readGPR2, false);
+ if (readGPR3.IsValid())
+ F(readGPR3, false);
+ if (readGPR4.IsValid())
+ F(readGPR4, false);
+ if (writtenGPR1.IsValid())
+ F(writtenGPR1, true);
+ if (writtenGPR2.IsValid())
+ F(writtenGPR2, true);
+ }
+
+ IMLReg readGPR1;
+ IMLReg readGPR2;
+ IMLReg readGPR3;
+ IMLReg readGPR4;
+ IMLReg writtenGPR1;
+ IMLReg writtenGPR2;
+};
+
+struct IMLInstruction
+{
+ IMLInstruction() {}
+ IMLInstruction(const IMLInstruction& other)
+ {
+ memcpy(this, &other, sizeof(IMLInstruction));
+ }
+
+ uint8 type;
+ uint8 operation;
+ union
+ {
+ struct
+ {
+ uint8 _padding[7];
+ }padding;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ }op_r_r;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regB;
+ }op_r_r_r;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regB;
+ IMLReg regCarry;
+ }op_r_r_r_carry;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ sint32 immS32;
+ }op_r_r_s32;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regCarry;
+ sint32 immS32;
+ }op_r_r_s32_carry;
+ struct
+ {
+ IMLReg regR;
+ IMLName name;
+ }op_r_name; // alias op_name_r
+ struct
+ {
+ IMLReg regR;
+ sint32 immS32;
+ }op_r_immS32;
+ struct
+ {
+ uint32 param;
+ uint32 param2;
+ uint16 paramU16;
+ IMLReg paramReg;
+ }op_macro;
+ struct
+ {
+ IMLReg registerData;
+ IMLReg registerMem;
+ IMLReg registerMem2;
+ uint8 copyWidth;
+ struct
+ {
+ bool swapEndian : 1;
+ bool signExtend : 1;
+ bool notExpanded : 1; // for floats
+ }flags2;
+ uint8 mode; // transfer mode
+ sint32 immS32;
+ }op_storeLoad;
+ struct
+ {
+ uintptr_t callAddress;
+ IMLReg regParam0;
+ IMLReg regParam1;
+ IMLReg regParam2;
+ IMLReg regReturn;
+ }op_call_imm;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ }op_fpr_r_r;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regB;
+ }op_fpr_r_r_r;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regB;
+ IMLReg regC;
+ }op_fpr_r_r_r_r;
+ struct
+ {
+ IMLReg regR;
+ }op_fpr_r;
+ struct
+ {
+ IMLReg regR; // stores the boolean result of the comparison
+ IMLReg regA;
+ IMLReg regB;
+ IMLCondition cond;
+ }op_fpr_compare;
+ struct
+ {
+ IMLReg regR; // stores the boolean result of the comparison
+ IMLReg regA;
+ IMLReg regB;
+ IMLCondition cond;
+ }op_compare;
+ struct
+ {
+ IMLReg regR; // stores the boolean result of the comparison
+ IMLReg regA;
+ sint32 immS32;
+ IMLCondition cond;
+ }op_compare_s32;
+ struct
+ {
+ IMLReg registerBool;
+ bool mustBeTrue;
+ }op_conditional_jump;
+ struct
+ {
+ IMLReg regEA;
+ IMLReg regCompareValue;
+ IMLReg regWriteValue;
+ IMLReg regBoolOut;
+ }op_atomic_compare_store;
+ // conditional operations (emitted if supported by target platform)
+ struct
+ {
+ // r_s32
+ IMLReg regR;
+ sint32 immS32;
+ // condition
+ uint8 crRegisterIndex;
+ uint8 crBitIndex;
+ bool bitMustBeSet;
+ }op_conditional_r_s32;
+ // X86 specific
+ struct
+ {
+ IMLCondition cond;
+ bool invertedCondition;
+ }op_x86_eflags_jcc;
+ };
+
+ bool IsSuffixInstruction() const
+ {
+ if (type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BL ||
+ type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_FAR ||
+ type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_TO_REG ||
+ type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE ||
+ type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE ||
+ type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ||
+ type == PPCREC_IML_TYPE_JUMP ||
+ type == PPCREC_IML_TYPE_CONDITIONAL_JUMP ||
+ type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ return true;
+ return false;
+ }
+
+ // instruction setters
+ void make_no_op()
+ {
+ type = PPCREC_IML_TYPE_NO_OP;
+ operation = 0;
+ }
+
+ void make_r_name(IMLReg regR, IMLName name)
+ {
+ cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format
+ type = PPCREC_IML_TYPE_R_NAME;
+ operation = PPCREC_IML_OP_ASSIGN;
+ op_r_name.regR = regR;
+ op_r_name.name = name;
+ }
+
+ void make_name_r(IMLName name, IMLReg regR)
+ {
+ cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format
+ type = PPCREC_IML_TYPE_NAME_R;
+ operation = PPCREC_IML_OP_ASSIGN;
+ op_r_name.regR = regR;
+ op_r_name.name = name;
+ }
+
+ void make_debugbreak(uint32 currentPPCAddress = 0)
+ {
+ make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0, IMLREG_INVALID);
+ }
+
+ void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16, IMLReg regParam)
+ {
+ this->type = PPCREC_IML_TYPE_MACRO;
+ this->operation = macroId;
+ this->op_macro.param = param;
+ this->op_macro.param2 = param2;
+ this->op_macro.paramU16 = paramU16;
+ this->op_macro.paramReg = regParam;
+ }
+
+ void make_cjump_cycle_check()
+ {
+ this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK;
+ this->operation = 0;
+ }
+
+ void make_r_r(uint32 operation, IMLReg regR, IMLReg regA)
+ {
+ this->type = PPCREC_IML_TYPE_R_R;
+ this->operation = operation;
+ this->op_r_r.regR = regR;
+ this->op_r_r.regA = regA;
+ }
+
+ void make_r_s32(uint32 operation, IMLReg regR, sint32 immS32)
+ {
+ this->type = PPCREC_IML_TYPE_R_S32;
+ this->operation = operation;
+ this->op_r_immS32.regR = regR;
+ this->op_r_immS32.immS32 = immS32;
+ }
+
+ void make_r_r_r(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB)
+ {
+ this->type = PPCREC_IML_TYPE_R_R_R;
+ this->operation = operation;
+ this->op_r_r_r.regR = regR;
+ this->op_r_r_r.regA = regA;
+ this->op_r_r_r.regB = regB;
+ }
+
+ void make_r_r_r_carry(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB, IMLReg regCarry)
+ {
+ this->type = PPCREC_IML_TYPE_R_R_R_CARRY;
+ this->operation = operation;
+ this->op_r_r_r_carry.regR = regR;
+ this->op_r_r_r_carry.regA = regA;
+ this->op_r_r_r_carry.regB = regB;
+ this->op_r_r_r_carry.regCarry = regCarry;
+ }
+
+ void make_r_r_s32(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32)
+ {
+ this->type = PPCREC_IML_TYPE_R_R_S32;
+ this->operation = operation;
+ this->op_r_r_s32.regR = regR;
+ this->op_r_r_s32.regA = regA;
+ this->op_r_r_s32.immS32 = immS32;
+ }
+
+ void make_r_r_s32_carry(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32, IMLReg regCarry)
+ {
+ this->type = PPCREC_IML_TYPE_R_R_S32_CARRY;
+ this->operation = operation;
+ this->op_r_r_s32_carry.regR = regR;
+ this->op_r_r_s32_carry.regA = regA;
+ this->op_r_r_s32_carry.immS32 = immS32;
+ this->op_r_r_s32_carry.regCarry = regCarry;
+ }
+
+ void make_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
+ {
+ this->type = PPCREC_IML_TYPE_COMPARE;
+ this->operation = PPCREC_IML_OP_INVALID;
+ this->op_compare.regR = regR;
+ this->op_compare.regA = regA;
+ this->op_compare.regB = regB;
+ this->op_compare.cond = cond;
+ }
+
+ void make_compare_s32(IMLReg regA, sint32 immS32, IMLReg regR, IMLCondition cond)
+ {
+ this->type = PPCREC_IML_TYPE_COMPARE_S32;
+ this->operation = PPCREC_IML_OP_INVALID;
+ this->op_compare_s32.regR = regR;
+ this->op_compare_s32.regA = regA;
+ this->op_compare_s32.immS32 = immS32;
+ this->op_compare_s32.cond = cond;
+ }
+
+ void make_conditional_jump(IMLReg regBool, bool mustBeTrue)
+ {
+ this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP;
+ this->operation = PPCREC_IML_OP_INVALID;
+ this->op_conditional_jump.registerBool = regBool;
+ this->op_conditional_jump.mustBeTrue = mustBeTrue;
+ }
+
+ void make_jump()
+ {
+ this->type = PPCREC_IML_TYPE_JUMP;
+ this->operation = PPCREC_IML_OP_INVALID;
+ }
+
+ // load from memory
+ void make_r_memory(IMLReg regD, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_LOAD;
+ this->operation = 0;
+ this->op_storeLoad.registerData = regD;
+ this->op_storeLoad.registerMem = regMem;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.copyWidth = copyWidth;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ this->op_storeLoad.flags2.signExtend = signExtend;
+ }
+
+ // store to memory
+ void make_memory_r(IMLReg regS, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_STORE;
+ this->operation = 0;
+ this->op_storeLoad.registerData = regS;
+ this->op_storeLoad.registerMem = regMem;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.copyWidth = copyWidth;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ this->op_storeLoad.flags2.signExtend = false;
+ }
+
+ void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput)
+ {
+ this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE;
+ this->operation = 0;
+ this->op_atomic_compare_store.regEA = regEA;
+ this->op_atomic_compare_store.regCompareValue = regCompareValue;
+ this->op_atomic_compare_store.regWriteValue = regWriteValue;
+ this->op_atomic_compare_store.regBoolOut = regSuccessOutput;
+ }
+
+ void make_call_imm(uintptr_t callAddress, IMLReg param0, IMLReg param1, IMLReg param2, IMLReg regReturn)
+ {
+ this->type = PPCREC_IML_TYPE_CALL_IMM;
+ this->operation = 0;
+ this->op_call_imm.callAddress = callAddress;
+ this->op_call_imm.regParam0 = param0;
+ this->op_call_imm.regParam1 = param1;
+ this->op_call_imm.regParam2 = param2;
+ this->op_call_imm.regReturn = regReturn;
+ }
+
+ // FPR
+
+ // load from memory
+ void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_LOAD;
+ this->operation = 0;
+ this->op_storeLoad.registerData = registerDestination;
+ this->op_storeLoad.registerMem = registerMemory;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.mode = mode;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ }
+
+ void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED;
+ this->operation = 0;
+ this->op_storeLoad.registerData = registerDestination;
+ this->op_storeLoad.registerMem = registerMemory1;
+ this->op_storeLoad.registerMem2 = registerMemory2;
+ this->op_storeLoad.immS32 = 0;
+ this->op_storeLoad.mode = mode;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ }
+
+ // store to memory
+ void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_STORE;
+ this->operation = 0;
+ this->op_storeLoad.registerData = registerSource;
+ this->op_storeLoad.registerMem = registerMemory;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.mode = mode;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ }
+
+ void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED;
+ this->operation = 0;
+ this->op_storeLoad.registerData = registerSource;
+ this->op_storeLoad.registerMem = registerMemory1;
+ this->op_storeLoad.registerMem2 = registerMemory2;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.mode = mode;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ }
+
+ void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_COMPARE;
+ this->operation = -999;
+ this->op_fpr_compare.regR = regR;
+ this->op_fpr_compare.regA = regA;
+ this->op_fpr_compare.regB = regB;
+ this->op_fpr_compare.cond = cond;
+ }
+
+ void make_fpr_r(sint32 operation, IMLReg registerResult)
+ {
+ // OP (fpr)
+ this->type = PPCREC_IML_TYPE_FPR_R;
+ this->operation = operation;
+ this->op_fpr_r.regR = registerResult;
+ }
+
+ void make_fpr_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER)
+ {
+ // fpr OP fpr
+ this->type = PPCREC_IML_TYPE_FPR_R_R;
+ this->operation = operation;
+ this->op_fpr_r_r.regR = registerResult;
+ this->op_fpr_r_r.regA = registerOperand;
+ }
+
+ void make_fpr_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER)
+ {
+ // fpr = OP (fpr,fpr)
+ this->type = PPCREC_IML_TYPE_FPR_R_R_R;
+ this->operation = operation;
+ this->op_fpr_r_r_r.regR = registerResult;
+ this->op_fpr_r_r_r.regA = registerOperand1;
+ this->op_fpr_r_r_r.regB = registerOperand2;
+ }
+
+ void make_fpr_r_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER)
+ {
+ // fpr = OP (fpr,fpr,fpr)
+ this->type = PPCREC_IML_TYPE_FPR_R_R_R_R;
+ this->operation = operation;
+ this->op_fpr_r_r_r_r.regR = registerResult;
+ this->op_fpr_r_r_r_r.regA = registerOperandA;
+ this->op_fpr_r_r_r_r.regB = registerOperandB;
+ this->op_fpr_r_r_r_r.regC = registerOperandC;
+ }
+
+ /* X86 specific */
+ void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition)
+ {
+ this->type = PPCREC_IML_TYPE_X86_EFLAGS_JCC;
+ this->operation = -999;
+ this->op_x86_eflags_jcc.cond = cond;
+ this->op_x86_eflags_jcc.invertedCondition = invertedCondition;
+ }
+
+ void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
+ bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used
+
+ void RewriteGPR(const std::unordered_map& translationTable);
+};
+
+// architecture specific constants
+namespace IMLArchX86
+{
+ static constexpr int PHYSREG_GPR_BASE = 0;
+ static constexpr int PHYSREG_FPR_BASE = 16;
+};
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
new file mode 100644
index 00000000..7671a163
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
@@ -0,0 +1,719 @@
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
+#include "Cafe/HW/Espresso/Recompiler/IML/IML.h"
+#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h"
+
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+#include "../BackendX64/BackendX64.h"
+
+#include "Common/FileStream.h"
+
+#include
+#include
+
+IMLReg _FPRRegFromID(IMLRegID regId)
+{
+ return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId);
+}
+
+void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg)
+{
+ IMLRegID fprIndex = fprReg.GetRegID();
+
+ IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
+ if (imlInstructionLoad->op_storeLoad.flags2.notExpanded)
+ return;
+ boost::container::static_vector trackedMoves; // only track up to 4 copies
+ IMLUsedRegisters registersUsed;
+ sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
+ bool foundMatch = false;
+ sint32 lastStore = -1;
+ for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++)
+ {
+ IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
+ if (imlInstruction->IsSuffixInstruction())
+ break;
+ // check if FPR is stored
+ if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE) ||
+ (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE))
+ {
+ if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex)
+ {
+ if (foundMatch == false)
+ {
+ // flag the load-single instruction as "don't expand" (leave single value as-is)
+ imlInstructionLoad->op_storeLoad.flags2.notExpanded = true;
+ }
+ // also set the flag for the store instruction
+ IMLInstruction* imlInstructionStore = imlInstruction;
+ imlInstructionStore->op_storeLoad.flags2.notExpanded = true;
+
+ foundMatch = true;
+ lastStore = i + 1;
+
+ continue;
+ }
+ }
+ // if the FPR is copied then keep track of it. We can expand the copies instead of the original
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN && imlInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex)
+ {
+ if (imlInstruction->op_fpr_r_r.regR.GetRegID() == fprIndex)
+ {
+ // unexpected no-op
+ break;
+ }
+ if (trackedMoves.size() >= trackedMoves.capacity())
+ {
+ // we cant track any more moves, expand here
+ lastStore = i;
+ break;
+ }
+ trackedMoves.push_back(i);
+ continue;
+ }
+ // check if FPR is overwritten
+ imlInstruction->CheckRegisterUsage(®istersUsed);
+ if (registersUsed.writtenGPR1.IsValidAndSameRegID(fprIndex) || registersUsed.writtenGPR2.IsValidAndSameRegID(fprIndex))
+ break;
+ if (registersUsed.readGPR1.IsValidAndSameRegID(fprIndex))
+ break;
+ if (registersUsed.readGPR2.IsValidAndSameRegID(fprIndex))
+ break;
+ if (registersUsed.readGPR3.IsValidAndSameRegID(fprIndex))
+ break;
+ if (registersUsed.readGPR4.IsValidAndSameRegID(fprIndex))
+ break;
+ }
+
+ if (foundMatch)
+ {
+ // insert expand instructions for each target register of a move
+ sint32 positionBias = 0;
+ for (auto& trackedMove : trackedMoves)
+ {
+ sint32 realPosition = trackedMove + positionBias;
+ IMLInstruction* imlMoveInstruction = imlSegment->imlList.data() + realPosition;
+ if (realPosition >= lastStore)
+ break; // expand is inserted before this move
+ else
+ lastStore++;
+
+ cemu_assert_debug(imlMoveInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlMoveInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex);
+ cemu_assert_debug(imlMoveInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::F64);
+ auto dstReg = imlMoveInstruction->op_fpr_r_r.regR;
+ IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, realPosition+1); // one after the move
+ newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, dstReg);
+ positionBias++;
+ }
+ // insert expand instruction after store
+ IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore);
+ newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex));
+ }
+}
+
+/*
+* Scans for patterns:
+*
+*
+*
+* For these patterns the store and load is modified to work with un-extended values (float remains as float, no double conversion)
+* The float->double extension is then executed later
+* Advantages:
+* Keeps denormals and other special float values intact
+* Slightly improves performance
+*/
+void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext)
+{
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ for (sint32 i = 0; i < segIt->imlList.size(); i++)
+ {
+ IMLInstruction* imlInstruction = segIt->imlList.data() + i;
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
+ {
+ PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
+ {
+ PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
+ }
+ }
+ }
+}
+
+void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
+{
+ cemu_assert_debug(gprReg.GetBaseFormat() == IMLRegFormat::I64); // todo - proper handling required for non-standard sizes
+ cemu_assert_debug(gprReg.GetRegFormat() == IMLRegFormat::I32);
+
+ IMLRegID gprIndex = gprReg.GetRegID();
+ IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
+ if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false )
+ return;
+ bool foundMatch = false;
+ IMLUsedRegisters registersUsed;
+ sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
+ sint32 i = imlIndexLoad + 1;
+ for (; i < scanRangeEnd; i++)
+ {
+ IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
+ if (imlInstruction->IsSuffixInstruction())
+ break;
+ // check if GPR is stored
+ if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) )
+ {
+ if (imlInstruction->op_storeLoad.registerMem.GetRegID() == gprIndex)
+ break;
+ if (imlInstruction->op_storeLoad.registerData.GetRegID() == gprIndex)
+ {
+ IMLInstruction* imlInstructionStore = imlInstruction;
+ if (foundMatch == false)
+ {
+ // switch the endian swap flag for the load instruction
+ imlInstructionLoad->op_storeLoad.flags2.swapEndian = !imlInstructionLoad->op_storeLoad.flags2.swapEndian;
+ foundMatch = true;
+ }
+ // switch the endian swap flag for the store instruction
+ imlInstructionStore->op_storeLoad.flags2.swapEndian = !imlInstructionStore->op_storeLoad.flags2.swapEndian;
+ // keep scanning
+ continue;
+ }
+ }
+ // check if GPR is accessed
+ imlInstruction->CheckRegisterUsage(®istersUsed);
+ if (registersUsed.readGPR1.IsValidAndSameRegID(gprIndex) ||
+ registersUsed.readGPR2.IsValidAndSameRegID(gprIndex) ||
+ registersUsed.readGPR3.IsValidAndSameRegID(gprIndex))
+ {
+ break;
+ }
+ if (registersUsed.IsBaseGPRWritten(gprReg))
+ return; // GPR overwritten, we don't need to byte swap anymore
+ }
+ if (foundMatch)
+ {
+ PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprReg, gprReg);
+ }
+}
+
+/*
+* Scans for patterns:
+*
+*
+*
+* For these patterns the store and load is modified to work with non-swapped values
+* The big_endian->little_endian conversion is then executed later
+* Advantages:
+* Slightly improves performance
+*/
+void IMLOptimizer_OptimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext)
+{
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ for (sint32 i = 0; i < segIt->imlList.size(); i++)
+ {
+ IMLInstruction* imlInstruction = segIt->imlList.data() + i;
+ if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian )
+ {
+ PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
+ }
+ }
+ }
+}
+
+IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg);
+
+sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg)
+{
+ if (gqrReg.IsInvalid())
+ return -1;
+ sint32 namedReg = PPCRecompilerImlGen_GetRegName(ppcImlGenContext, gqrReg);
+ if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7))
+ {
+ return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ return -1;
+}
+
+bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue)
+{
+ // the default configuration is:
+ // UGQR0 = 0x00000000
+ // UGQR2 = 0x00040004
+ // UGQR3 = 0x00050005
+ // UGQR4 = 0x00060006
+ // UGQR5 = 0x00070007
+ // but games are free to modify UGQR2 to UGQR7 it seems.
+ // no game modifies UGQR0 so it's safe enough to optimize for the default value
+ // Ideally we would do some kind of runtime tracking and second recompilation to create fast paths for PSQ_L/PSQ_ST but thats todo
+ if (gqrIndex == 0)
+ gqrValue = 0x00000000;
+ else
+ return false;
+ return true;
+}
+
+// analyses register dependencies across the entire function
+// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten)
+class IMLOptimizerRegIOAnalysis
+{
+ public:
+ // constructor with segment pointer list as span
+ IMLOptimizerRegIOAnalysis(std::span segmentList, uint32 maxRegId) : m_segmentList(segmentList), m_maxRegId(maxRegId)
+ {
+ m_segRegisterInOutList.resize(segmentList.size());
+ }
+
+ struct IMLSegmentRegisterInOut
+ {
+ // todo - since our register ID range is usually pretty small (<64) we could use integer bitmasks to accelerate this? There is a helper class used in RA code already
+ std::unordered_set regWritten; // registers which are modified in this segment
+ std::unordered_set regImported; // registers which are read in this segment before they are written (importing value from previous segments)
+ std::unordered_set regForward; // registers which are not read or written in this segment, but are imported into a later segment (propagated info)
+ };
+
+ // calculate which registers are imported (read-before-written) and forwarded (read-before-written by a later segment) per segment
+ // then in a second step propagate the dependencies across linked segments
+ void ComputeDepedencies()
+ {
+ std::vector& segRegisterInOutList = m_segRegisterInOutList;
+ IMLSegmentRegisterInOut* segIO = segRegisterInOutList.data();
+ uint32 index = 0;
+ for(auto& seg : m_segmentList)
+ {
+ seg->momentaryIndex = index;
+ index++;
+ for(auto& instr : seg->imlList)
+ {
+ IMLUsedRegisters registerUsage;
+ instr.CheckRegisterUsage(®isterUsage);
+ // registers are considered imported if they are read before being written in this seg
+ registerUsage.ForEachReadGPR([&](IMLReg gprReg) {
+ IMLRegID gprId = gprReg.GetRegID();
+ if (!segIO->regWritten.contains(gprId))
+ {
+ segIO->regImported.insert(gprId);
+ }
+ });
+ registerUsage.ForEachWrittenGPR([&](IMLReg gprReg) {
+ IMLRegID gprId = gprReg.GetRegID();
+ segIO->regWritten.insert(gprId);
+ });
+ }
+ segIO++;
+ }
+ // for every exit segment, import all registers
+ for(auto& seg : m_segmentList)
+ {
+ if (!seg->nextSegmentIsUncertain)
+ continue;
+ if(seg->deadCodeEliminationHintSeg)
+ continue;
+ IMLSegmentRegisterInOut& segIO = segRegisterInOutList[seg->momentaryIndex];
+ for(uint32 i=0; i<=m_maxRegId; i++)
+ {
+ segIO.regImported.insert((IMLRegID)i);
+ }
+ }
+ // broadcast dependencies across segment chains
+ std::unordered_set segIdsWhichNeedUpdate;
+ for (uint32 i = 0; i < m_segmentList.size(); i++)
+ {
+ segIdsWhichNeedUpdate.insert(i);
+ }
+ while(!segIdsWhichNeedUpdate.empty())
+ {
+ auto firstIt = segIdsWhichNeedUpdate.begin();
+ uint32 segId = *firstIt;
+ segIdsWhichNeedUpdate.erase(firstIt);
+ // forward regImported and regForward to earlier segments into their regForward, unless the register is written
+ auto& curSeg = m_segmentList[segId];
+ IMLSegmentRegisterInOut& curSegIO = segRegisterInOutList[segId];
+ for(auto& prevSeg : curSeg->list_prevSegments)
+ {
+ IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex];
+ bool prevSegChanged = false;
+ for(auto& regId : curSegIO.regImported)
+ {
+ if (!prevSegIO.regWritten.contains(regId))
+ prevSegChanged |= prevSegIO.regForward.insert(regId).second;
+ }
+ for(auto& regId : curSegIO.regForward)
+ {
+ if (!prevSegIO.regWritten.contains(regId))
+ prevSegChanged |= prevSegIO.regForward.insert(regId).second;
+ }
+ if(prevSegChanged)
+ segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex);
+ }
+ // same for hint links
+ for(auto& prevSeg : curSeg->list_deadCodeHintBy)
+ {
+ IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex];
+ bool prevSegChanged = false;
+ for(auto& regId : curSegIO.regImported)
+ {
+ if (!prevSegIO.regWritten.contains(regId))
+ prevSegChanged |= prevSegIO.regForward.insert(regId).second;
+ }
+ for(auto& regId : curSegIO.regForward)
+ {
+ if (!prevSegIO.regWritten.contains(regId))
+ prevSegChanged |= prevSegIO.regForward.insert(regId).second;
+ }
+ if(prevSegChanged)
+ segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex);
+ }
+ }
+ }
+
+ std::unordered_set GetRegistersNeededAtEndOfSegment(IMLSegment& seg)
+ {
+ std::unordered_set regsNeeded;
+ if(seg.nextSegmentIsUncertain)
+ {
+ if(seg.deadCodeEliminationHintSeg)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex];
+ regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
+ regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
+ }
+ else
+ {
+ // add all regs
+ for(uint32 i = 0; i <= m_maxRegId; i++)
+ regsNeeded.insert(i);
+ }
+ return regsNeeded;
+ }
+ if(seg.nextSegmentBranchTaken)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex];
+ regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
+ regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
+ }
+ if(seg.nextSegmentBranchNotTaken)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex];
+ regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
+ regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
+ }
+ return regsNeeded;
+ }
+
+ bool IsRegisterNeededAtEndOfSegment(IMLSegment& seg, IMLRegID regId)
+ {
+ if(seg.nextSegmentIsUncertain)
+ {
+ if(!seg.deadCodeEliminationHintSeg)
+ return true;
+ auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex];
+ if(nextSegIO.regImported.contains(regId))
+ return true;
+ if(nextSegIO.regForward.contains(regId))
+ return true;
+ return false;
+ }
+ if(seg.nextSegmentBranchTaken)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex];
+ if(nextSegIO.regImported.contains(regId))
+ return true;
+ if(nextSegIO.regForward.contains(regId))
+ return true;
+ }
+ if(seg.nextSegmentBranchNotTaken)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex];
+ if(nextSegIO.regImported.contains(regId))
+ return true;
+ if(nextSegIO.regForward.contains(regId))
+ return true;
+ }
+ return false;
+ }
+
+ private:
+ std::span m_segmentList;
+ uint32 m_maxRegId;
+
+ std::vector m_segRegisterInOutList;
+
+};
+
+// scan backwards starting from index and return the index of the first found instruction which writes to the given register (by id)
+sint32 IMLUtil_FindInstructionWhichWritesRegister(IMLSegment& seg, sint32 startIndex, IMLReg reg, sint32 maxScanDistance = -1)
+{
+ sint32 endIndex = std::max(startIndex - maxScanDistance, 0);
+ for (sint32 i = startIndex; i >= endIndex; i--)
+ {
+ IMLInstruction& imlInstruction = seg.imlList[i];
+ IMLUsedRegisters registersUsed;
+ imlInstruction.CheckRegisterUsage(®istersUsed);
+ if (registersUsed.IsBaseGPRWritten(reg))
+ return i;
+ }
+ return -1;
+}
+
+// returns true if the instruction can safely be moved while keeping ordering constraints and data dependencies intact
+// initialIndex is inclusive, targetIndex is exclusive
+bool IMLUtil_CanMoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex)
+{
+ boost::container::static_vector regsWritten;
+ boost::container::static_vector regsRead;
+ // get list of read and written registers
+ IMLUsedRegisters registersUsed;
+ seg.imlList[initialIndex].CheckRegisterUsage(®istersUsed);
+ registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) {
+ if (isWritten)
+ regsWritten.push_back(reg.GetRegID());
+ else
+ regsRead.push_back(reg.GetRegID());
+ });
+ // check all the instructions inbetween
+ if(initialIndex < targetIndex)
+ {
+ sint32 scanStartIndex = initialIndex+1; // +1 to skip the moving instruction itself
+ sint32 scanEndIndex = targetIndex;
+ for (sint32 i = scanStartIndex; i < scanEndIndex; i++)
+ {
+ IMLUsedRegisters registersUsed;
+ seg.imlList[i].CheckRegisterUsage(®istersUsed);
+ // in order to be able to move an instruction past another instruction, any of the read registers must not be modified (written)
+ // and any of it's written registers must not be read
+ bool canMove = true;
+ registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) {
+ IMLRegID regId = reg.GetRegID();
+ if (!isWritten)
+ canMove = canMove && std::find(regsWritten.begin(), regsWritten.end(), regId) == regsWritten.end();
+ else
+ canMove = canMove && std::find(regsRead.begin(), regsRead.end(), regId) == regsRead.end();
+ });
+ if(!canMove)
+ return false;
+ }
+ }
+ else
+ {
+ cemu_assert_unimplemented(); // backwards scan is todo
+ return false;
+ }
+ return true;
+}
+
+sint32 IMLUtil_CountRegisterReadsInRange(IMLSegment& seg, sint32 scanStartIndex, sint32 scanEndIndex, IMLRegID regId)
+{
+ cemu_assert_debug(scanStartIndex <= scanEndIndex);
+ cemu_assert_debug(scanEndIndex < seg.imlList.size());
+ sint32 count = 0;
+ for (sint32 i = scanStartIndex; i <= scanEndIndex; i++)
+ {
+ IMLUsedRegisters registersUsed;
+ seg.imlList[i].CheckRegisterUsage(®istersUsed);
+ registersUsed.ForEachReadGPR([&](IMLReg reg) {
+ if (reg.GetRegID() == regId)
+ count++;
+ });
+ }
+ return count;
+}
+
+// move instruction from one index to another
+// instruction will be inserted before the instruction at targetIndex
+// returns the new instruction index of the moved instruction
+sint32 IMLUtil_MoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex)
+{
+ cemu_assert_debug(initialIndex != targetIndex);
+ IMLInstruction temp = seg.imlList[initialIndex];
+ if (initialIndex < targetIndex)
+ {
+ cemu_assert_debug(targetIndex > 0);
+ targetIndex--;
+ for(size_t i=initialIndex; i regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg);
+
+ // start with suffix instruction
+ if(seg.HasSuffixInstruction())
+ {
+ IMLInstruction& imlInstruction = seg.imlList[seg.GetSuffixInstructionIndex()];
+ IMLUsedRegisters registersUsed;
+ imlInstruction.CheckRegisterUsage(®istersUsed);
+ registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
+ regsNeeded.erase(reg.GetRegID());
+ });
+ registersUsed.ForEachReadGPR([&](IMLReg reg) {
+ regsNeeded.insert(reg.GetRegID());
+ });
+ }
+ // iterate instructions backwards
+ for (sint32 i = seg.imlList.size() - (seg.HasSuffixInstruction() ? 2:1); i >= 0; i--)
+ {
+ IMLInstruction& imlInstruction = seg.imlList[i];
+ IMLUsedRegisters registersUsed;
+ imlInstruction.CheckRegisterUsage(®istersUsed);
+ // register read -> remove from overwritten list
+ // register written -> add to overwritten list
+
+ // check if this instruction only writes registers which will never be read
+ bool onlyWritesRedundantRegisters = true;
+ registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
+ if (regsNeeded.contains(reg.GetRegID()))
+ onlyWritesRedundantRegisters = false;
+ });
+ // check if any of the written registers are read after this point
+ registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
+ regsNeeded.erase(reg.GetRegID());
+ });
+ registersUsed.ForEachReadGPR([&](IMLReg reg) {
+ regsNeeded.insert(reg.GetRegID());
+ });
+ if(!imlInstruction.HasSideEffects() && onlyWritesRedundantRegisters)
+ {
+ imlInstruction.make_no_op();
+ }
+ }
+}
+
+void IMLOptimizerX86_SubstituteCJumpForEflagsJump(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
+{
+ // convert and optimize bool condition jumps to eflags condition jumps
+ // - Moves eflag setter (e.g. cmp) closer to eflags consumer (conditional jump) if necessary. If not possible but required then exit early
+ // - Since we only rely on eflags, the boolean register can be optimized out if DCE considers it unused
+ // - Further detect and optimize patterns like DEC + CMP + JCC into fused ops (todo)
+
+ // check if this segment ends with a conditional jump
+ if(!seg.HasSuffixInstruction())
+ return;
+ sint32 cjmpInstIndex = seg.GetSuffixInstructionIndex();
+ if(cjmpInstIndex < 0)
+ return;
+ IMLInstruction& cjumpInstr = seg.imlList[cjmpInstIndex];
+ if( cjumpInstr.type != PPCREC_IML_TYPE_CONDITIONAL_JUMP )
+ return;
+ IMLReg regCondBool = cjumpInstr.op_conditional_jump.registerBool;
+ bool invertedCondition = !cjumpInstr.op_conditional_jump.mustBeTrue;
+ // find the instruction which sets the bool
+ sint32 cmpInstrIndex = IMLUtil_FindInstructionWhichWritesRegister(seg, cjmpInstIndex-1, regCondBool, 20);
+ if(cmpInstrIndex < 0)
+ return;
+ // check if its an instruction combo which can be optimized (currently only cmp + cjump) and get the condition
+ IMLInstruction& condSetterInstr = seg.imlList[cmpInstrIndex];
+ IMLCondition cond;
+ if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE)
+ cond = condSetterInstr.op_compare.cond;
+ else if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE_S32)
+ cond = condSetterInstr.op_compare_s32.cond;
+ else
+ return;
+ // check if instructions inbetween modify eflags
+ sint32 indexEflagsSafeStart = -1; // index of the first instruction which does not modify eflags up to cjump
+ for(sint32 i = cjmpInstIndex-1; i > cmpInstrIndex; i--)
+ {
+ if(IMLOptimizerX86_ModifiesEFlags(seg.imlList[i]))
+ {
+ indexEflagsSafeStart = i+1;
+ break;
+ }
+ }
+ if(indexEflagsSafeStart >= 0)
+ {
+ cemu_assert(indexEflagsSafeStart > 0);
+ // there are eflags-modifying instructions inbetween the bool setter and cjump
+ // try to move the eflags setter close enough to the cjump (to indexEflagsSafeStart)
+ bool canMove = IMLUtil_CanMoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart);
+ if(!canMove)
+ {
+ return;
+ }
+ else
+ {
+ cmpInstrIndex = IMLUtil_MoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart);
+ }
+ }
+ // we can turn the jump into an eflags jump
+ cjumpInstr.make_x86_eflags_jcc(cond, invertedCondition);
+
+ if (IMLUtil_CountRegisterReadsInRange(seg, cmpInstrIndex, cjmpInstIndex, regCondBool.GetRegID()) > 1 || regIoAnalysis.IsRegisterNeededAtEndOfSegment(seg, regCondBool.GetRegID()))
+ return; // bool register is used beyond the CMP, we can't drop it
+
+ auto& cmpInstr = seg.imlList[cmpInstrIndex];
+ cemu_assert_debug(cmpInstr.type == PPCREC_IML_TYPE_COMPARE || cmpInstr.type == PPCREC_IML_TYPE_COMPARE_S32);
+ if(cmpInstr.type == PPCREC_IML_TYPE_COMPARE)
+ {
+ IMLReg regA = cmpInstr.op_compare.regA;
+ IMLReg regB = cmpInstr.op_compare.regB;
+ seg.imlList[cmpInstrIndex].make_r_r(PPCREC_IML_OP_X86_CMP, regA, regB);
+ }
+ else
+ {
+ IMLReg regA = cmpInstr.op_compare_s32.regA;
+ sint32 val = cmpInstr.op_compare_s32.immS32;
+ seg.imlList[cmpInstrIndex].make_r_s32(PPCREC_IML_OP_X86_CMP, regA, val);
+ }
+
+}
+
+void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
+{
+ IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg);
+
+#ifdef ARCH_X86_64
+ // x86 specific optimizations
+ IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis)
+#endif
+}
+
+void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext)
+{
+ IMLOptimizerRegIOAnalysis regIoAnalysis(ppcImlGenContext.segmentList2, ppcImlGenContext.GetMaxRegId());
+ regIoAnalysis.ComputeDepedencies();
+ for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
+ {
+ IMLOptimizer_StandardOptimizationPassForSegment(regIoAnalysis, *segIt);
+ }
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
new file mode 100644
index 00000000..935e61ac
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
@@ -0,0 +1,2204 @@
+#include "IML.h"
+
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+#include "IMLRegisterAllocator.h"
+#include "IMLRegisterAllocatorRanges.h"
+
+#include "../BackendX64/BackendX64.h"
+#ifdef __aarch64__
+#include "../BackendAArch64/BackendAArch64.h"
+#endif
+
+#include
+#include
+
+#include "Common/cpu_features.h"
+
+#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed
+#define DEBUG_RA_INSTRUCTION_GEN 0
+
+struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment
+{
+ IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd)
+ : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {};
+
+ void TrackInstruction(sint32 index)
+ {
+ usageStart = std::min(usageStart, index);
+ usageEnd = std::max(usageEnd, index + 1); // exclusive index
+ }
+
+ sint32 usageStart;
+ sint32 usageEnd;
+ bool isProcessed{false};
+ IMLRegFormat regBaseFormat;
+};
+
+struct IMLRegisterAllocatorContext
+{
+ IMLRegisterAllocatorParameters* raParam;
+ ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec
+
+ std::unordered_map regIdToBaseFormat;
+ // first pass
+ std::vector> perSegmentAbstractRanges;
+
+ // helper methods
+ inline std::unordered_map& GetSegmentAbstractRangeMap(IMLSegment* imlSegment)
+ {
+ return perSegmentAbstractRanges[imlSegment->momentaryIndex];
+ }
+
+ inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const
+ {
+ auto it = regIdToBaseFormat.find(regId);
+ cemu_assert_debug(it != regIdToBaseFormat.cend());
+ return it->second;
+ }
+};
+
+struct IMLFixedRegisters
+{
+ struct Entry
+ {
+ Entry(IMLReg reg, IMLPhysRegisterSet physRegSet)
+ : reg(reg), physRegSet(physRegSet) {}
+
+ IMLReg reg;
+ IMLPhysRegisterSet physRegSet;
+ };
+ boost::container::small_vector listInput; // fixed register requirements for instruction input edge
+ boost::container::small_vector listOutput; // fixed register requirements for instruction output edge
+};
+
+static void SetupCallingConvention(const IMLInstruction* instruction, IMLFixedRegisters& fixedRegs, const IMLPhysReg intParamToPhysReg[3], const IMLPhysReg floatParamToPhysReg[3], const IMLPhysReg intReturnPhysReg, const IMLPhysReg floatReturnPhysReg, IMLPhysRegisterSet volatileRegisters)
+{
+ sint32 numIntParams = 0, numFloatParams = 0;
+
+ auto AddParameterMapping = [&](IMLReg reg) {
+ if (!reg.IsValid())
+ return;
+ if (reg.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ IMLPhysRegisterSet ps;
+ ps.SetAvailable(intParamToPhysReg[numIntParams]);
+ fixedRegs.listInput.emplace_back(reg, ps);
+ numIntParams++;
+ }
+ else if (reg.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ IMLPhysRegisterSet ps;
+ ps.SetAvailable(floatParamToPhysReg[numFloatParams]);
+ fixedRegs.listInput.emplace_back(reg, ps);
+ numFloatParams++;
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ };
+ AddParameterMapping(instruction->op_call_imm.regParam0);
+ AddParameterMapping(instruction->op_call_imm.regParam1);
+ AddParameterMapping(instruction->op_call_imm.regParam2);
+ // return value
+ if (instruction->op_call_imm.regReturn.IsValid())
+ {
+ IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
+ bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
+ IMLPhysRegisterSet ps;
+ if (isIntegerFormat)
+ {
+ ps.SetAvailable(intReturnPhysReg);
+ volatileRegisters.SetReserved(intReturnPhysReg);
+ }
+ else
+ {
+ ps.SetAvailable(floatReturnPhysReg);
+ volatileRegisters.SetReserved(floatReturnPhysReg);
+ }
+ fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
+ }
+ // block volatile registers from being used on the output edge, this makes the register allocator store them during the call
+ fixedRegs.listOutput.emplace_back(IMLREG_INVALID, volatileRegisters);
+}
+
+#if defined(__aarch64__)
+// aarch64
+static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
+{
+ fixedRegs.listInput.clear();
+ fixedRegs.listOutput.clear();
+
+ // The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
+ // on x86 this is used for instructions like SHL , CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
+ if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
+ const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
+ IMLPhysRegisterSet volatileRegs;
+ for (int i = 0; i <= 17; i++) // x0 to x17 are volatile
+ volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
+ // v0-v7 & v16-v31 are volatile. For v8-v15 only the high 64 bits are volatile.
+ for (int i = 0; i <= 7; i++)
+ volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
+ for (int i = 16; i <= 31; i++)
+ volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
+ SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
+ }
+}
+#else
+// x86-64
+static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
+{
+ fixedRegs.listInput.clear();
+ fixedRegs.listOutput.clear();
+
+ if (instruction->type == PPCREC_IML_TYPE_R_R_R)
+ {
+ if (instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ {
+ if(!g_CPUFeatures.x86.bmi2)
+ {
+ IMLPhysRegisterSet ps;
+ ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_ECX);
+ fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps);
+ }
+ }
+ }
+ else if (instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ IMLPhysRegisterSet ps;
+ ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX);
+ fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX
+ fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX
+ }
+ else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ const IMLPhysReg intParamToPhysReg[3] = {IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8};
+ const IMLPhysReg floatParamToPhysReg[3] = {IMLArchX86::PHYSREG_FPR_BASE + 0, IMLArchX86::PHYSREG_FPR_BASE + 1, IMLArchX86::PHYSREG_FPR_BASE + 2};
+ IMLPhysRegisterSet volatileRegs;
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
+ // YMM0-YMM5 are volatile
+ for (int i = 0; i <= 5; i++)
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + i);
+ // for YMM6-YMM15 only the upper 128 bits are volatile which we dont use
+ SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX, IMLArchX86::PHYSREG_FPR_BASE + 0, volatileRegs);
+ }
+}
+#endif
+
+uint32 IMLRA_GetNextIterationIndex()
+{
+ static uint32 recRACurrentIterationIndex = 0;
+ recRACurrentIterationIndex++;
+ return recRACurrentIterationIndex;
+}
+
+bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex, IMLSegment* imlSegmentLoopBase)
+{
+ if (currentSegment == imlSegmentLoopBase)
+ return true;
+ if (currentSegment->raInfo.lastIterationIndex == iterationIndex)
+ return currentSegment->raInfo.isPartOfProcessedLoop;
+ if (depth >= 9)
+ return false;
+ currentSegment->raInfo.lastIterationIndex = iterationIndex;
+ currentSegment->raInfo.isPartOfProcessedLoop = false;
+
+ if (currentSegment->nextSegmentIsUncertain)
+ return false;
+ if (currentSegment->nextSegmentBranchNotTaken)
+ {
+ if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex)
+ {
+ currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
+ }
+ }
+ if (currentSegment->nextSegmentBranchTaken)
+ {
+ if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex)
+ {
+ currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
+ }
+ }
+ if (currentSegment->raInfo.isPartOfProcessedLoop)
+ currentSegment->loopDepth++;
+ return currentSegment->raInfo.isPartOfProcessedLoop;
+}
+
+void IMLRA_DetectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase)
+{
+ uint32 iterationIndex = IMLRA_GetNextIterationIndex();
+ imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex;
+ if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase))
+ {
+ imlSegmentLoopBase->loopDepth++;
+ }
+}
+
+void IMLRA_IdentifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
+{
+ if (imlSegment->nextSegmentIsUncertain)
+ return;
+ // check if this segment has a branch that links to itself (tight loop)
+ if (imlSegment->nextSegmentBranchTaken == imlSegment)
+ {
+ // segment loops over itself
+ imlSegment->loopDepth++;
+ return;
+ }
+ // check if this segment has a branch that goes backwards (potential complex loop)
+ if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex)
+ {
+ IMLRA_DetectLoop(ppcImlGenContext, imlSegment);
+ }
+}
+
+#define SUBRANGE_LIST_SIZE (128)
+
+sint32 IMLRA_CountDistanceUntilNextUse(raLivenessRange* subrange, raInstructionEdge startPosition)
+{
+ for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++)
+ {
+ if (subrange->list_accessLocations[i].pos >= startPosition)
+ {
+ auto& it = subrange->list_accessLocations[i];
+ cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write
+ cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment());
+ return it.pos.GetRaw() - startPosition.GetRaw();
+ }
+ }
+ cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000);
+ return 10001 * 2;
+}
+
+// returns -1 if there is no fixed register requirement on or after startPosition
+sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLivenessRange* range, raInstructionEdge startPosition, sint32 physRegister, bool& hasFixedAccess)
+{
+ hasFixedAccess = false;
+ cemu_assert_debug(startPosition.IsInstructionIndex());
+ for (auto& fixedReqEntry : range->list_fixedRegRequirements)
+ {
+ if (fixedReqEntry.pos < startPosition)
+ continue;
+ if (fixedReqEntry.allowedReg.IsAvailable(physRegister))
+ {
+ hasFixedAccess = true;
+ return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw();
+ }
+ }
+ cemu_assert_debug(range->interval.end.IsInstructionIndex());
+ return range->interval.end.GetRaw() - startPosition.GetRaw();
+}
+
+sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister)
+{
+ cemu_assert_debug(startPosition.IsInstructionIndex());
+ raInstructionEdge lastPos2;
+ lastPos2.Set(imlSegment->imlList.size(), false);
+
+ raInstructionEdge endPos;
+ endPos = startPosition + maxDistance;
+ if (endPos > lastPos2)
+ endPos = lastPos2;
+ IMLFixedRegisters fixedRegs;
+ if (startPosition.IsOnOutputEdge())
+ GetInstructionFixedRegisters(imlSegment->imlList.data() + startPosition.GetInstructionIndex(), fixedRegs);
+ for (raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos)
+ {
+ if (currentPos.IsOnInputEdge())
+ {
+ GetInstructionFixedRegisters(imlSegment->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
+ }
+ auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
+ for (auto& fixedRegLoc : fixedRegAccess)
+ {
+ if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
+ {
+ cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers
+ if (fixedRegLoc.physRegSet.IsAvailable(physRegister))
+ return currentPos.GetRaw() - startPosition.GetRaw();
+ }
+ }
+ }
+ return endPos.GetRaw() - startPosition.GetRaw();
+}
+
+// count how many instructions there are until physRegister is used by any subrange or reserved for any fixed register requirement (returns 0 if register is in use at startIndex)
+sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister)
+{
+ cemu_assert_debug(startPosition.IsInstructionIndex());
+ sint32 minDistance = (sint32)imlSegment->imlList.size() * 2 - startPosition.GetRaw();
+ // next
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ if (subrangeItr->GetPhysicalRegister() != physRegister)
+ {
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ continue;
+ }
+ if (subrangeItr->interval.ContainsEdge(startPosition))
+ return 0;
+ if (subrangeItr->interval.end < startPosition)
+ {
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ continue;
+ }
+ cemu_assert_debug(startPosition <= subrangeItr->interval.start);
+ sint32 currentDist = subrangeItr->interval.start.GetRaw() - startPosition.GetRaw();
+ minDistance = std::min(minDistance, currentDist);
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ return minDistance;
+}
+
+struct IMLRALivenessTimeline
+{
+ IMLRALivenessTimeline()
+ {
+ }
+
+ // manually add an active range
+ void AddActiveRange(raLivenessRange* subrange)
+ {
+ activeRanges.emplace_back(subrange);
+ }
+
+ void ExpireRanges(raInstructionEdge expireUpTo)
+ {
+ expiredRanges.clear();
+ size_t count = activeRanges.size();
+ for (size_t f = 0; f < count; f++)
+ {
+ raLivenessRange* liverange = activeRanges[f];
+ if (liverange->interval.end < expireUpTo) // this was <= but since end is not inclusive we need to use <
+ {
+#ifdef CEMU_DEBUG_ASSERT
+ if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken))
+ assert_dbg(); // infinite subranges should not expire
+#endif
+ expiredRanges.emplace_back(liverange);
+ // remove entry
+ activeRanges[f] = activeRanges[count - 1];
+ f--;
+ count--;
+ }
+ }
+ if (count != activeRanges.size())
+ activeRanges.resize(count);
+ }
+
+ std::span GetExpiredRanges()
+ {
+ return {expiredRanges.data(), expiredRanges.size()};
+ }
+
+ std::span GetActiveRanges()
+ {
+ return {activeRanges.data(), activeRanges.size()};
+ }
+
+ raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId)
+ {
+ for (auto& it : activeRanges)
+ if (it->virtualRegister == regId)
+ return it;
+ return nullptr;
+ }
+
+ raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg)
+ {
+ cemu_assert_debug(physReg >= 0);
+ for (auto& it : activeRanges)
+ if (it->physicalRegister == physReg)
+ return it;
+ return nullptr;
+ }
+
+ boost::container::small_vector activeRanges;
+
+ private:
+ boost::container::small_vector expiredRanges;
+};
+
+// mark occupied registers by any overlapping range as unavailable in physRegSet
+void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet)
+{
+ auto clusterRanges = range2->GetAllSubrangesInCluster();
+ for (auto& subrange : clusterRanges)
+ {
+ IMLSegment* imlSegment = subrange->imlSegment;
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ if (subrange == subrangeItr)
+ {
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ continue;
+ }
+ if (subrange->interval.IsOverlapping(subrangeItr->interval))
+ {
+ if (subrangeItr->GetPhysicalRegister() >= 0)
+ physRegSet.SetReserved(subrangeItr->GetPhysicalRegister());
+ }
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ }
+}
+
+bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs)
+{
+ return lhs->interval.start < rhs->interval.start;
+}
+
+void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
+{
+ raLivenessRange* subrangeList[4096 + 1];
+ sint32 count = 0;
+ // disassemble linked list
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ cemu_assert(count < 4096);
+ subrangeList[count] = subrangeItr;
+ count++;
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ if (count == 0)
+ {
+ imlSegment->raInfo.linkedList_allSubranges = nullptr;
+ return;
+ }
+ // sort
+ std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare);
+ // reassemble linked list
+ subrangeList[count] = nullptr;
+ imlSegment->raInfo.linkedList_allSubranges = subrangeList[0];
+ subrangeList[0]->link_allSegmentRanges.prev = nullptr;
+ subrangeList[0]->link_allSegmentRanges.next = subrangeList[1];
+ for (sint32 i = 1; i < count; i++)
+ {
+ subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1];
+ subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1];
+ }
+ // validate list
+#if DEBUG_RA_EXTRA_VALIDATION
+ sint32 count2 = 0;
+ subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ raInstructionEdge currentStartPosition;
+ currentStartPosition.SetRaw(RA_INTER_RANGE_START);
+ while (subrangeItr)
+ {
+ count2++;
+ if (subrangeItr->interval2.start < currentStartPosition)
+ assert_dbg();
+ currentStartPosition = subrangeItr->interval2.start;
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ if (count != count2)
+ assert_dbg();
+#endif
+}
+
+std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment)
+{
+ return imlSegment->raInfo.linkedList_perVirtualRegister;
+}
+
+raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId)
+{
+ auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId);
+ if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end())
+ return nullptr;
+ return it->second;
+}
+
+struct raFixedRegRequirementWithVGPR
+{
+ raFixedRegRequirementWithVGPR(raInstructionEdge pos, IMLPhysRegisterSet allowedReg, IMLRegID regId)
+ : pos(pos), allowedReg(allowedReg), regId(regId) {}
+
+ raInstructionEdge pos;
+ IMLPhysRegisterSet allowedReg;
+ IMLRegID regId;
+};
+
+std::vector IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment)
+{
+ std::vector frrList;
+ size_t index = 0;
+ while (index < imlSegment->imlList.size())
+ {
+ IMLFixedRegisters fixedRegs;
+ GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
+ raInstructionEdge pos;
+ pos.Set(index, true);
+ for (auto& fixedRegAccess : fixedRegs.listInput)
+ {
+ frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
+ }
+ pos = pos + 1;
+ for (auto& fixedRegAccess : fixedRegs.listOutput)
+ {
+ frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
+ }
+ index++;
+ }
+ return frrList;
+}
+
+boost::container::small_vector IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg)
+{
+ boost::container::small_vector rangeList;
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
+ {
+ if (!currentRange->interval.ContainsEdge(pos))
+ continue;
+ IMLPhysRegisterSet allowedRegs;
+ if (!currentRange->GetAllowedRegistersEx(allowedRegs))
+ continue;
+ if (allowedRegs.IsAvailable(physReg))
+ rangeList.emplace_back(currentRange);
+ }
+ return rangeList;
+}
+
+void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
+{
+ // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border
+ // todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This can be avoided in some cases
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;)
+ {
+ IMLPhysRegisterSet allowedRegs;
+ if(currentRange->list_fixedRegRequirements.empty())
+ {
+ currentRange = currentRange->link_allSegmentRanges.next;
+ continue; // since we run this pass for every segment we dont need to do global checks here for clusters which may not even have fixed register requirements
+ }
+ if (!currentRange->GetAllowedRegistersEx(allowedRegs))
+ {
+ currentRange = currentRange->link_allSegmentRanges.next;
+ continue;
+ }
+ if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
+ {
+ raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next;
+ IMLRA_ExplodeRangeCluster(ppcImlGenContext, currentRange);
+ currentRange = nextRange;
+ continue;
+ }
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ // second pass - look for ranges with conflicting fixed register requirements and split these too (locally)
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
+ {
+ IMLPhysRegisterSet allowedRegs;
+ if (currentRange->list_fixedRegRequirements.empty())
+ continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
+ if (!currentRange->GetAllowedRegistersEx(allowedRegs))
+ continue;
+ if (allowedRegs.HasAnyAvailable())
+ continue;
+ cemu_assert_unimplemented();
+ }
+ // third pass - assign fixed registers, split ranges if needed
+ std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
+ std::unordered_map lastVGPR;
+ for (size_t i = 0; i < frr.size(); i++)
+ {
+ raFixedRegRequirementWithVGPR& entry = frr[i];
+ // we currently only handle fixed register requirements with a single register
+ // with one exception: When regId is IMLRegID_INVALID then the entry acts as a list of reserved registers
+ cemu_assert_debug(entry.regId == IMLRegID_INVALID || entry.allowedReg.HasExactlyOneAvailable());
+ for (IMLPhysReg physReg = entry.allowedReg.GetFirstAvailableReg(); physReg >= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg + 1))
+ {
+ // check if the assigned vGPR has changed
+ bool vgprHasChanged = false;
+ auto it = lastVGPR.find(physReg);
+ if (it != lastVGPR.end())
+ vgprHasChanged = it->second != entry.regId;
+ else
+ vgprHasChanged = true;
+ lastVGPR[physReg] = entry.regId;
+
+ if (!vgprHasChanged)
+ continue;
+
+ boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg);
+ if (entry.regId != IMLRegID_INVALID)
+ cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers
+
+ for (auto& range : overlappingRanges)
+ {
+ if (range->interval.start < entry.pos)
+ {
+ IMLRA_SplitRange(ppcImlGenContext, range, entry.pos, true);
+ }
+ }
+ }
+ }
+ // finally iterate ranges and assign fixed registers
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
+ {
+ IMLPhysRegisterSet allowedRegs;
+ if (currentRange->list_fixedRegRequirements.empty())
+ continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
+ if (!currentRange->GetAllowedRegistersEx(allowedRegs))
+ {
+ cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
+ continue;
+ }
+ cemu_assert_debug(allowedRegs.HasExactlyOneAvailable());
+ currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg());
+ }
+ // DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned
+#if DEBUG_RA_EXTRA_VALIDATION
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
+ {
+ IMLPhysRegisterSet allowedRegs;
+ if (!currentRange->HasPhysicalRegister())
+ continue;
+ for (raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next)
+ {
+ if (currentRange == currentRange2)
+ continue;
+ if (currentRange->interval2.IsOverlapping(currentRange2->interval2))
+ {
+ cemu_assert_debug(currentRange->GetPhysicalRegister() != currentRange2->GetPhysicalRegister());
+ }
+ }
+ }
+#endif
+}
+
+// we should not split ranges on instructions with tied registers (i.e. where a register encoded as a single parameter is both input and output)
+// otherwise the RA algorithm has to assign both ranges the same physical register (not supported yet) and the point of splitting to fit another range is nullified
+void IMLRA_MakeSafeSplitPosition(IMLSegment* imlSegment, raInstructionEdge& pos)
+{
+ // we ignore the instruction for now and just always make it a safe split position
+ cemu_assert_debug(pos.IsInstructionIndex());
+ if (pos.IsOnOutputEdge())
+ pos = pos - 1;
+}
+
+// convenience wrapper for IMLRA_MakeSafeSplitPosition
+void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge startPos, sint32& distance)
+{
+ cemu_assert_debug(startPos.IsInstructionIndex());
+ cemu_assert_debug(distance >= 0);
+ raInstructionEdge endPos = startPos + distance;
+ IMLRA_MakeSafeSplitPosition(imlSegment, endPos);
+ if (endPos < startPos)
+ {
+ distance = 0;
+ return;
+ }
+ distance = endPos.GetRaw() - startPos.GetRaw();
+}
+
+static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx);
+
+class RASpillStrategy
+{
+ public:
+ virtual void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) = 0;
+
+ sint32 GetCost()
+ {
+ return strategyCost;
+ }
+
+ protected:
+ void ResetCost()
+ {
+ strategyCost = INT_MAX;
+ }
+
+ sint32 strategyCost;
+};
+
+class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy
+{
+ public:
+ void Reset()
+ {
+ localRangeHoleCutting.distance = -1;
+ localRangeHoleCutting.largestHoleSubrange = nullptr;
+ ResetCost();
+ }
+
+ void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
+ {
+ raInstructionEdge currentRangeStart = currentRange->interval.start;
+ sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
+ cemu_assert_debug(localRangeHoleCutting.distance == -1);
+ cemu_assert_debug(strategyCost == INT_MAX);
+ if (!currentRangeStart.ConnectsToPreviousSegment())
+ {
+ cemu_assert_debug(currentRangeStart.GetRaw() >= 0);
+ for (auto candidate : timeline.activeRanges)
+ {
+ if (candidate->interval.ExtendsIntoNextSegment())
+ continue;
+ // new checks (Oct 2024):
+ if (candidate == currentRange)
+ continue;
+ if (candidate->GetPhysicalRegister() < 0)
+ continue;
+ if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
+ continue;
+
+ sint32 distance2 = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart);
+ IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance2);
+ if (distance2 < 2)
+ continue;
+ cemu_assert_debug(currentRangeStart.IsInstructionIndex());
+ distance2 = std::min(distance2, imlSegment->imlList.size() * 2 - currentRangeStart.GetRaw()); // limit distance to end of segment
+ // calculate split cost of candidate
+ sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2);
+ // calculate additional split cost of currentRange if hole is not large enough
+ if (distance2 < requiredSize2)
+ {
+ cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2);
+ // we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes)
+ cost += (requiredSize2 - distance2) / 10;
+ }
+ // compare cost with previous candidates
+ if (cost < strategyCost)
+ {
+ strategyCost = cost;
+ localRangeHoleCutting.distance = distance2;
+ localRangeHoleCutting.largestHoleSubrange = candidate;
+ }
+ }
+ }
+ }
+
+ void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
+ {
+ cemu_assert_debug(strategyCost != INT_MAX);
+ sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
+ raInstructionEdge currentRangeStart = currentRange->interval.start;
+
+ raInstructionEdge holeStartPosition = currentRangeStart;
+ raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance;
+ raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange;
+
+ if (collisionRange->interval.start < holeStartPosition)
+ {
+ collisionRange = IMLRA_SplitRange(nullptr, collisionRange, holeStartPosition, true);
+ cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point
+ cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough
+ }
+ else
+ {
+ cemu_assert_unimplemented(); // we still need to trim?
+ }
+ // we may also have to cut the current range to fit partially into the hole
+ if (requiredSize2 > localRangeHoleCutting.distance)
+ {
+ raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true);
+ if (tailRange)
+ {
+ cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
+ tailRange->UnsetPhysicalRegister();
+ }
+ }
+ // verify that the hole is large enough
+ if (collisionRange)
+ {
+ cemu_assert_debug(!collisionRange->interval.IsOverlapping(currentRange->interval));
+ }
+ }
+
+ private:
+ struct
+ {
+ sint32 distance;
+ raLivenessRange* largestHoleSubrange;
+ } localRangeHoleCutting;
+};
+
+class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy
+{
+ // split current range (this is generally only a good choice when the current range is long but has few usages)
+ public:
+ void Reset()
+ {
+ ResetCost();
+ availableRegisterHole.distance = -1;
+ availableRegisterHole.physRegister = -1;
+ }
+
+ void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& localAvailableRegsMask, const IMLPhysRegisterSet& allowedRegs)
+ {
+ sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
+
+ raInstructionEdge currentRangeStart = currentRange->interval.start;
+ cemu_assert_debug(strategyCost == INT_MAX);
+ availableRegisterHole.distance = -1;
+ availableRegisterHole.physRegister = -1;
+ if (currentRangeStart.GetRaw() >= 0)
+ {
+ if (localAvailableRegsMask.HasAnyAvailable())
+ {
+ sint32 physRegItr = -1;
+ while (true)
+ {
+ physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1);
+ if (physRegItr < 0)
+ break;
+ if (!allowedRegs.IsAvailable(physRegItr))
+ continue;
+ // get size of potential hole for this register
+ sint32 distance = PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(imlSegment, currentRangeStart, physRegItr);
+
+ // some instructions may require the same register for another range, check the distance here
+ sint32 distUntilFixedReg = IMLRA_CountDistanceUntilFixedRegUsage(imlSegment, currentRangeStart, distance, currentRange->GetVirtualRegister(), physRegItr);
+ if (distUntilFixedReg < distance)
+ distance = distUntilFixedReg;
+
+ IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
+ if (distance < 2)
+ continue;
+ // calculate additional cost due to split
+ cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register?
+ sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
+ // add small additional cost for the remaining range (prefer larger holes)
+ cost += ((requiredSize2 - distance) / 2) / 10;
+ if (cost < strategyCost)
+ {
+ strategyCost = cost;
+ availableRegisterHole.distance = distance;
+ availableRegisterHole.physRegister = physRegItr;
+ }
+ }
+ }
+ }
+ }
+
+ void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
+ {
+ cemu_assert_debug(strategyCost != INT_MAX);
+ raInstructionEdge currentRangeStart = currentRange->interval.start;
+ // use available register
+ raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true);
+ if (tailRange)
+ {
+ cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
+ tailRange->UnsetPhysicalRegister();
+ }
+ }
+
+ private:
+ struct
+ {
+ sint32 physRegister;
+ sint32 distance; // size of hole
+ } availableRegisterHole;
+};
+
+class RASpillStrategy_ExplodeRange : public RASpillStrategy
+{
+ public:
+ void Reset()
+ {
+ ResetCost();
+ explodeRange.range = nullptr;
+ explodeRange.distance = -1;
+ }
+
+ void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
+ {
+ raInstructionEdge currentRangeStart = currentRange->interval.start;
+ if (currentRangeStart.ConnectsToPreviousSegment())
+ currentRangeStart.Set(0, true);
+ sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
+ cemu_assert_debug(strategyCost == INT_MAX);
+ explodeRange.range = nullptr;
+ explodeRange.distance = -1;
+ for (auto candidate : timeline.activeRanges)
+ {
+ if (!candidate->interval.ExtendsIntoNextSegment())
+ continue;
+ // new checks (Oct 2024):
+ if (candidate == currentRange)
+ continue;
+ if (candidate->GetPhysicalRegister() < 0)
+ continue;
+ if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
+ continue;
+
+ sint32 distance = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart);
+ IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
+ if (distance < 2)
+ continue;
+ sint32 cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
+ // if the hole is not large enough, add cost of splitting current subrange
+ if (distance < requiredSize2)
+ {
+ cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
+ // add small additional cost for the remaining range (prefer larger holes)
+ cost += ((requiredSize2 - distance) / 2) / 10;
+ }
+ // compare with current best candidate for this strategy
+ if (cost < strategyCost)
+ {
+ strategyCost = cost;
+ explodeRange.distance = distance;
+ explodeRange.range = candidate;
+ }
+ }
+ }
+
+ void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
+ {
+ raInstructionEdge currentRangeStart = currentRange->interval.start;
+ if (currentRangeStart.ConnectsToPreviousSegment())
+ currentRangeStart.Set(0, true);
+ sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
+ // explode range
+ IMLRA_ExplodeRangeCluster(nullptr, explodeRange.range);
+ // split current subrange if necessary
+ if (requiredSize2 > explodeRange.distance)
+ {
+ raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + explodeRange.distance, true);
+ if (tailRange)
+ {
+ cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
+ tailRange->UnsetPhysicalRegister();
+ }
+ }
+ }
+
+ private:
+ struct
+ {
+ raLivenessRange* range;
+ sint32 distance; // size of hole
+ // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
+ } explodeRange;
+};
+
+class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy
+{
+ public:
+ void Reset()
+ {
+ ResetCost();
+ explodeRange.range = nullptr;
+ explodeRange.distance = -1;
+ }
+
+ void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
+ {
+ // explode the range with the least cost
+ cemu_assert_debug(strategyCost == INT_MAX);
+ cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1);
+ for (auto candidate : timeline.activeRanges)
+ {
+ if (!candidate->interval.ExtendsIntoNextSegment())
+ continue;
+ // only select candidates that clash with current subrange
+ if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange)
+ continue;
+ // and also filter any that dont meet fixed register requirements
+ if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
+ continue;
+ sint32 cost;
+ cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
+ // compare with current best candidate for this strategy
+ if (cost < strategyCost)
+ {
+ strategyCost = cost;
+ explodeRange.distance = INT_MAX;
+ explodeRange.range = candidate;
+ }
+ }
+ // add current range as a candidate too
+ sint32 ownCost;
+ ownCost = IMLRA_CalculateAdditionalCostOfRangeExplode(currentRange);
+ if (ownCost < strategyCost)
+ {
+ strategyCost = ownCost;
+ explodeRange.distance = INT_MAX;
+ explodeRange.range = currentRange;
+ }
+ }
+
+ void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
+ {
+ cemu_assert_debug(strategyCost != INT_MAX);
+ IMLRA_ExplodeRangeCluster(ctx, explodeRange.range);
+ }
+
+ private:
+ struct
+ {
+ raLivenessRange* range;
+ sint32 distance; // size of hole
+ // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
+ }explodeRange;
+};
+
+// filter any registers from candidatePhysRegSet which cannot be used by currentRange due to fixed register requirements within the range that it occupies
+void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocatorContext& ctx, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
+{
+ IMLSegment* seg = currentRange->imlSegment;
+ if (seg->imlList.empty())
+ return; // there can be no fixed register requirements if there are no instructions
+
+ raInstructionEdge firstPos = currentRange->interval.start;
+ if (currentRange->interval.start.ConnectsToPreviousSegment())
+ firstPos.SetRaw(0);
+ else if (currentRange->interval.start.ConnectsToNextSegment())
+ firstPos.Set(seg->imlList.size() - 1, false);
+
+ raInstructionEdge lastPos = currentRange->interval.end;
+ if (currentRange->interval.end.ConnectsToPreviousSegment())
+ lastPos.SetRaw(0);
+ else if (currentRange->interval.end.ConnectsToNextSegment())
+ lastPos.Set(seg->imlList.size() - 1, false);
+ cemu_assert_debug(firstPos <= lastPos);
+
+ IMLRegID ourRegId = currentRange->GetVirtualRegister();
+
+ IMLFixedRegisters fixedRegs;
+ if (firstPos.IsOnOutputEdge())
+ GetInstructionFixedRegisters(seg->imlList.data() + firstPos.GetInstructionIndex(), fixedRegs);
+ for (raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos)
+ {
+ if (currentPos.IsOnInputEdge())
+ {
+ GetInstructionFixedRegisters(seg->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
+ }
+ auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
+ for (auto& fixedRegLoc : fixedRegAccess)
+ {
+ if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
+ candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet);
+ }
+ }
+}
+
+// filter out any registers along the range cluster
+void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
+{
+ cemu_assert_debug(currentRange->imlSegment == imlSegment);
+ if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
+ {
+ auto clusterRanges = currentRange->GetAllSubrangesInCluster();
+ for (auto& rangeIt : clusterRanges)
+ {
+ IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, rangeIt, candidatePhysRegSet);
+ if (!candidatePhysRegSet.HasAnyAvailable())
+ break;
+ }
+ return;
+ }
+ IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, currentRange, candidatePhysRegSet);
+}
+
+bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
+{
+ // sort subranges ascending by start index
+ _sortSegmentAllSubrangesLinkedList(imlSegment);
+
+ IMLRALivenessTimeline livenessTimeline;
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ raInstructionEdge lastInstructionEdge;
+ lastInstructionEdge.SetRaw(RA_INTER_RANGE_END);
+
+ struct
+ {
+ RASpillStrategy_LocalRangeHoleCutting localRangeHoleCutting;
+ RASpillStrategy_AvailableRegisterHole availableRegisterHole;
+ RASpillStrategy_ExplodeRange explodeRange;
+ // for ranges that connect to follow up segments:
+ RASpillStrategy_ExplodeRangeInter explodeRangeInter;
+ } strategy;
+
+ while (subrangeItr)
+ {
+ raInstructionEdge currentRangeStart = subrangeItr->interval.start; // used to be currentIndex before refactor
+ PPCRecRA_debugValidateSubrange(subrangeItr);
+
+ livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges
+
+ // if subrange already has register assigned then add it to the active list and continue
+ if (subrangeItr->GetPhysicalRegister() >= 0)
+ {
+ // verify if register is actually available
+#if DEBUG_RA_EXTRA_VALIDATION
+ for (auto& liverangeItr : livenessTimeline.activeRanges)
+ {
+ // check for register mismatch
+ cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister());
+ }
+#endif
+ livenessTimeline.AddActiveRange(subrangeItr);
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ continue;
+ }
+ // ranges with fixed register requirements should already have a phys register assigned
+ if (!subrangeItr->list_fixedRegRequirements.empty())
+ {
+ cemu_assert_debug(subrangeItr->HasPhysicalRegister());
+ }
+ // find free register for current subrangeItr and segment
+ IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister());
+ IMLPhysRegisterSet candidatePhysRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat);
+ cemu_assert_debug(candidatePhysRegSet.HasAnyAvailable()); // no valid pool provided for this register type
+
+ IMLPhysRegisterSet allowedRegs = subrangeItr->GetAllowedRegisters(candidatePhysRegSet);
+ cemu_assert_debug(allowedRegs.HasAnyAvailable()); // if zero regs are available, then this range needs to be split to avoid mismatching register requirements (do this in the initial pass to keep the code here simpler)
+ candidatePhysRegSet &= allowedRegs;
+
+ for (auto& liverangeItr : livenessTimeline.activeRanges)
+ {
+ cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0);
+ candidatePhysRegSet.SetReserved(liverangeItr->GetPhysicalRegister());
+ }
+ // check intersections with other ranges and determine allowed registers
+ IMLPhysRegisterSet localAvailableRegsMask = candidatePhysRegSet; // mask of registers that are currently not used (does not include range checks in other segments)
+ if (candidatePhysRegSet.HasAnyAvailable())
+ {
+ // check for overlaps on a global scale (subrangeItr can be part of a larger range cluster across multiple segments)
+ PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, candidatePhysRegSet);
+ }
+ // some target instructions may enforce specific registers (e.g. common on X86 where something like SHL , CL forces CL as the count register)
+ // we determine the list of allowed registers here
+ // this really only works if we assume single-register requirements (otherwise its better not to filter out early and instead allow register corrections later but we don't support this yet)
+ if (candidatePhysRegSet.HasAnyAvailable())
+ {
+ IMLRA_FilterReservedFixedRegisterRequirementsForCluster(ctx, imlSegment, subrangeItr, candidatePhysRegSet);
+ }
+ if (candidatePhysRegSet.HasAnyAvailable())
+ {
+ // use free register
+ subrangeItr->SetPhysicalRegisterForCluster(candidatePhysRegSet.GetFirstAvailableReg());
+ livenessTimeline.AddActiveRange(subrangeItr);
+ subrangeItr = subrangeItr->link_allSegmentRanges.next; // next
+ continue;
+ }
+ // there is no free register for the entire range
+ // evaluate different strategies of splitting ranges to free up another register or shorten the current range
+ strategy.localRangeHoleCutting.Reset();
+ strategy.availableRegisterHole.Reset();
+ strategy.explodeRange.Reset();
+ // cant assign register
+ // there might be registers available, we just can't use them due to range conflicts
+ RASpillStrategy* selectedStrategy = nullptr;
+ auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) {
+ if (newStrategy.GetCost() == INT_MAX)
+ return;
+ if (selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost())
+ selectedStrategy = &newStrategy;
+ };
+
+ if (!subrangeItr->interval.ExtendsIntoNextSegment())
+ {
+ // range ends in current segment, use local strategies
+ // evaluate strategy: Cut hole into local subrange
+ strategy.localRangeHoleCutting.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
+ SelectStrategyIfBetter(strategy.localRangeHoleCutting);
+ // evaluate strategy: Split current range to fit in available holes
+ // todo - are checks required to avoid splitting on the suffix instruction?
+ strategy.availableRegisterHole.Evaluate(imlSegment, subrangeItr, livenessTimeline, localAvailableRegsMask, allowedRegs);
+ SelectStrategyIfBetter(strategy.availableRegisterHole);
+ // evaluate strategy: Explode inter-segment ranges
+ strategy.explodeRange.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
+ SelectStrategyIfBetter(strategy.explodeRange);
+ }
+ else // if subrangeItr->interval2.ExtendsIntoNextSegment()
+ {
+ strategy.explodeRangeInter.Reset();
+ strategy.explodeRangeInter.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
+ SelectStrategyIfBetter(strategy.explodeRangeInter);
+ }
+ // choose strategy
+ if (selectedStrategy)
+ {
+ selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr);
+ }
+ else
+ {
+ // none of the evulated strategies can be applied, this should only happen if the segment extends into the next segment(s) for which we have no good strategy
+ cemu_assert_debug(subrangeItr->interval.ExtendsPreviousSegment());
+ // alternative strategy if we have no other choice: explode current range
+ IMLRA_ExplodeRangeCluster(ppcImlGenContext, subrangeItr);
+ }
+ return false;
+ }
+ return true;
+}
+
+void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext)
+{
+ // start with frequently executed segments first
+ sint32 maxLoopDepth = 0;
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth);
+ }
+ // assign fixed registers first
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt);
+#if DEBUG_RA_EXTRA_VALIDATION
+ // fixed registers are currently handled per-segment, but here we validate that they are assigned correctly on a global scope as well
+ for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2)
+ {
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
+ {
+ IMLPhysRegisterSet allowedRegs;
+ if (!currentRange->GetAllowedRegistersEx(allowedRegs))
+ {
+ cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
+ continue;
+ }
+ cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister()));
+ }
+ }
+#endif
+
+ while (true)
+ {
+ bool done = false;
+ for (sint32 d = maxLoopDepth; d >= 0; d--)
+ {
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ if (segIt->loopDepth != d)
+ continue;
+ done = IMLRA_AssignSegmentRegisters(ctx, ppcImlGenContext, segIt);
+ if (done == false)
+ break;
+ }
+ if (done == false)
+ break;
+ }
+ if (done)
+ break;
+ }
+}
+
+void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext)
+{
+ // insert empty segments after every non-taken branch if the linked segment has more than one input
+ // this gives the register allocator more room to create efficient spill code
+ size_t segmentIndex = 0;
+ while (segmentIndex < ppcImlGenContext->segmentList2.size())
+ {
+ IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
+ if (imlSegment->nextSegmentIsUncertain)
+ {
+ segmentIndex++;
+ continue;
+ }
+ if (imlSegment->nextSegmentBranchTaken == nullptr || imlSegment->nextSegmentBranchNotTaken == nullptr)
+ {
+ segmentIndex++;
+ continue;
+ }
+ if (imlSegment->nextSegmentBranchNotTaken->list_prevSegments.size() <= 1)
+ {
+ segmentIndex++;
+ continue;
+ }
+ if (imlSegment->nextSegmentBranchNotTaken->isEnterable)
+ {
+ segmentIndex++;
+ continue;
+ }
+ PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1);
+ IMLSegment* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0];
+ IMLSegment* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1];
+ IMLSegment* nextSegment = imlSegment->nextSegmentBranchNotTaken;
+ IMLSegment_RemoveLink(imlSegmentP0, nextSegment);
+ IMLSegment_SetLinkBranchNotTaken(imlSegmentP1, nextSegment);
+ IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1);
+ segmentIndex++;
+ }
+ // detect loops
+ for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
+ {
+ IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
+ imlSegment->momentaryIndex = s;
+ }
+ for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
+ {
+ IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
+ IMLRA_IdentifyLoop(ppcImlGenContext, imlSegment);
+ }
+}
+
+IMLRARegAbstractLiveness* _GetAbstractRange(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
+{
+ auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
+ auto it = segMap.find(regId);
+ return it != segMap.end() ? &it->second : nullptr;
+}
+
+// scan instructions and establish register usage range for segment
+void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
+{
+ size_t instructionIndex = 0;
+ IMLUsedRegisters gprTracking;
+ auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
+ while (instructionIndex < imlSegment->imlList.size())
+ {
+ imlSegment->imlList[instructionIndex].CheckRegisterUsage(&gprTracking);
+ gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
+ IMLRegID gprId = gprReg.GetRegID();
+ auto it = segDistMap.find(gprId);
+ if (it == segDistMap.end())
+ {
+ segDistMap.try_emplace(gprId, gprReg.GetBaseFormat(), (sint32)instructionIndex, (sint32)instructionIndex + 1);
+ ctx.regIdToBaseFormat.try_emplace(gprId, gprReg.GetBaseFormat());
+ }
+ else
+ {
+ it->second.TrackInstruction(instructionIndex);
+#ifdef CEMU_DEBUG_ASSERT
+ cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same
+#endif
+ }
+ });
+ instructionIndex++;
+ }
+}
+
+void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
+{
+ // for each register calculate min/max index of usage range within each segment
+ size_t dbgIndex = 0;
+ for (IMLSegment* segIt : ctx.deprGenContext->segmentList2)
+ {
+ cemu_assert_debug(segIt->momentaryIndex == dbgIndex);
+ IMLRA_CalculateSegmentMinMaxAbstractRanges(ctx, segIt);
+ dbgIndex++;
+ }
+}
+
+raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name)
+{
+ IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR);
+ if (!abstractRange)
+ return nullptr;
+ if (abstractRange->isProcessed)
+ {
+ // return already existing segment
+ raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR);
+ cemu_assert_debug(existingRange);
+ return existingRange;
+ }
+ abstractRange->isProcessed = true;
+ // create subrange
+ cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr);
+ cemu_assert_debug(
+ (abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) ||
+ abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger
+ sint32 inclusiveEnd = abstractRange->usageEnd;
+ if (inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END)
+ inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive
+ raInterval interval;
+ interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true);
+ raLivenessRange* subrange = IMLRA_CreateRange(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end);
+ // traverse forward
+ if (abstractRange->usageEnd == RA_INTER_RANGE_END)
+ {
+ if (imlSegment->nextSegmentBranchTaken)
+ {
+ IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR);
+ if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START)
+ {
+ subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name);
+ subrange->subrangeBranchTaken->previousRanges.push_back(subrange);
+ cemu_assert_debug(subrange->subrangeBranchTaken->interval.ExtendsPreviousSegment());
+ }
+ }
+ if (imlSegment->nextSegmentBranchNotTaken)
+ {
+ IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR);
+ if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START)
+ {
+ subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name);
+ subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange);
+ cemu_assert_debug(subrange->subrangeBranchNotTaken->interval.ExtendsPreviousSegment());
+ }
+ }
+ }
+ // traverse backward
+ if (abstractRange->usageStart == RA_INTER_RANGE_START)
+ {
+ for (auto& it : imlSegment->list_prevSegments)
+ {
+ IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR);
+ if (!prevRange)
+ continue;
+ if (prevRange->usageEnd == RA_INTER_RANGE_END)
+ PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name);
+ }
+ }
+ return subrange;
+}
+
+void IMLRA_UpdateOrAddSubrangeLocation(raLivenessRange* subrange, raInstructionEdge pos)
+{
+ if (subrange->list_accessLocations.empty())
+ {
+ subrange->list_accessLocations.emplace_back(pos);
+ return;
+ }
+ if(subrange->list_accessLocations.back().pos == pos)
+ return;
+ cemu_assert_debug(subrange->list_accessLocations.back().pos < pos);
+ subrange->list_accessLocations.emplace_back(pos);
+}
+
+// take abstract range data and create LivenessRanges
+void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
+{
+ const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment);
+
+ auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) {
+ raLivenessRange* subrange = regToSubrange.find(regId)->second;
+ cemu_assert_debug(subrange);
+ raFixedRegRequirement tmp;
+ tmp.pos.Set(instructionIndex, isInput);
+ tmp.allowedReg = physRegSet;
+ if (subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos)
+ subrange->list_fixedRegRequirements.push_back(tmp);
+ };
+
+ // convert abstract min-max ranges to liveness range objects
+ auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
+ for (auto& it : segMap)
+ {
+ if (it.second.isProcessed)
+ continue;
+ IMLRegID regId = it.first;
+ PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second);
+ }
+ // fill created ranges with read/write location indices
+ // note that at this point there is only one range per register per segment
+ // and the algorithm below relies on this
+ size_t index = 0;
+ IMLUsedRegisters gprTracking;
+ while (index < imlSegment->imlList.size())
+ {
+ imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
+ raInstructionEdge pos((sint32)index, true);
+ gprTracking.ForEachReadGPR([&](IMLReg gprReg) {
+ IMLRegID gprId = gprReg.GetRegID();
+ raLivenessRange* subrange = regToSubrange.find(gprId)->second;
+ IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
+ });
+ pos = {(sint32)index, false};
+ gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) {
+ IMLRegID gprId = gprReg.GetRegID();
+ raLivenessRange* subrange = regToSubrange.find(gprId)->second;
+ IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
+ });
+ // check fixed register requirements
+ IMLFixedRegisters fixedRegs;
+ GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
+ for (auto& fixedRegAccess : fixedRegs.listInput)
+ {
+ if (fixedRegAccess.reg != IMLREG_INVALID)
+ AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet);
+ }
+ for (auto& fixedRegAccess : fixedRegs.listOutput)
+ {
+ if (fixedRegAccess.reg != IMLREG_INVALID)
+ AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet);
+ }
+ index++;
+ }
+}
+
+void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
+{
+ auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
+ auto it = segDistMap.find(regId);
+ if (it == segDistMap.end())
+ {
+ sint32 startIndex;
+ if (imlSegment->HasSuffixInstruction())
+ startIndex = imlSegment->GetSuffixInstructionIndex();
+ else
+ startIndex = RA_INTER_RANGE_END;
+ segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, startIndex, RA_INTER_RANGE_END);
+ }
+ else
+ {
+ it->second.usageEnd = RA_INTER_RANGE_END;
+ }
+}
+
+void IMLRA_extendAbstractRangeToBeginningOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
+{
+ auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
+ auto it = segDistMap.find(regId);
+ if (it == segDistMap.end())
+ {
+ segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, RA_INTER_RANGE_START, RA_INTER_RANGE_START);
+ }
+ else
+ {
+ it->second.usageStart = RA_INTER_RANGE_START;
+ }
+ // propagate backwards
+ for (auto& it : imlSegment->list_prevSegments)
+ {
+ IMLRA_extendAbstractRangeToEndOfSegment(ctx, it, regId);
+ }
+}
+
+void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regId, IMLSegment** route, sint32 routeDepth)
+{
+#ifdef CEMU_DEBUG_ASSERT
+ if (routeDepth < 2)
+ assert_dbg();
+#endif
+ // extend starting range to end of segment
+ IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[0], regId);
+ // extend all the connecting segments in both directions
+ for (sint32 i = 1; i < (routeDepth - 1); i++)
+ {
+ IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[i], regId);
+ IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[i], regId);
+ }
+ // extend the final segment towards the beginning
+ IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId);
+}
+
+void _IMLRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth)
+{
+ if (routeDepth >= 64)
+ {
+ cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded\n");
+ return;
+ }
+ route[routeDepth] = currentSegment;
+
+ IMLRARegAbstractLiveness* range = _GetAbstractRange(ctx, currentSegment, regID);
+
+ if (!range)
+ {
+ // measure distance over entire segment
+ distanceLeft -= (sint32)currentSegment->imlList.size();
+ if (distanceLeft > 0)
+ {
+ if (currentSegment->nextSegmentBranchNotTaken)
+ _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1);
+ if (currentSegment->nextSegmentBranchTaken)
+ _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1);
+ }
+ return;
+ }
+ else
+ {
+ // measure distance to range
+ if (range->usageStart == RA_INTER_RANGE_END)
+ {
+ if (distanceLeft < (sint32)currentSegment->imlList.size())
+ return; // range too far away
+ }
+ else if (range->usageStart != RA_INTER_RANGE_START && range->usageStart > distanceLeft)
+ return; // out of range
+ // found close range -> connect ranges
+ IMLRA_connectAbstractRanges(ctx, regID, route, routeDepth + 1);
+ }
+}
+
+void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRARegAbstractLiveness* range, IMLRegID regID)
+{
+ cemu_assert_debug(range->usageEnd >= 0);
+ // count instructions to end of initial segment
+ sint32 instructionsUntilEndOfSeg;
+ if (range->usageEnd == RA_INTER_RANGE_END)
+ instructionsUntilEndOfSeg = 0;
+ else
+ instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - range->usageEnd;
+ cemu_assert_debug(instructionsUntilEndOfSeg >= 0);
+ sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
+ if (remainingScanDist <= 0)
+ return; // can't reach end
+
+ IMLSegment* route[64];
+ route[0] = currentSegment;
+ if (currentSegment->nextSegmentBranchNotTaken)
+ _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1);
+ if (currentSegment->nextSegmentBranchTaken)
+ _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1);
+}
+
+void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
+{
+ auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
+ for (auto& it : segMap)
+ {
+ PPCRecRA_checkAndTryExtendRange(ctx, imlSegment, &(it.second), it.first);
+ }
+#ifdef CEMU_DEBUG_ASSERT
+ if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
+ assert_dbg();
+ if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
+ assert_dbg();
+#endif
+}
+
+void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
+{
+ std::vector list_segments;
+ std::vector list_processedSegment;
+ size_t segmentCount = ctx.deprGenContext->segmentList2.size();
+ list_segments.reserve(segmentCount + 1);
+ list_processedSegment.resize(segmentCount);
+
+ auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {
+ list_processedSegment[seg->momentaryIndex] = true;
+ };
+ auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool {
+ return list_processedSegment[seg->momentaryIndex];
+ };
+ markSegProcessed(imlSegment);
+
+ sint32 index = 0;
+ list_segments.push_back(imlSegment);
+ while (index < list_segments.size())
+ {
+ IMLSegment* currentSegment = list_segments[index];
+ PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment);
+ // follow flow
+ if (currentSegment->nextSegmentBranchNotTaken && !isSegProcessed(currentSegment->nextSegmentBranchNotTaken))
+ {
+ markSegProcessed(currentSegment->nextSegmentBranchNotTaken);
+ list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
+ }
+ if (currentSegment->nextSegmentBranchTaken && !isSegProcessed(currentSegment->nextSegmentBranchTaken))
+ {
+ markSegProcessed(currentSegment->nextSegmentBranchTaken);
+ list_segments.push_back(currentSegment->nextSegmentBranchTaken);
+ }
+ index++;
+ }
+}
+
+void IMLRA_MergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx)
+{
+ for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
+ {
+ IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
+ if (!imlSegment->list_prevSegments.empty())
+ continue; // not an entry/standalone segment
+ PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment);
+ }
+}
+
+void IMLRA_ExtendAbstractRangesOutOfLoops(IMLRegisterAllocatorContext& ctx)
+{
+ for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
+ {
+ IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
+ auto localLoopDepth = imlSegment->loopDepth;
+ if (localLoopDepth <= 0)
+ continue; // not inside a loop
+ // look for loop exit
+ bool hasLoopExit = false;
+ if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
+ {
+ hasLoopExit = true;
+ }
+ if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
+ {
+ hasLoopExit = true;
+ }
+ if (hasLoopExit == false)
+ continue;
+
+ // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
+ auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
+ for (auto& it : segMap)
+ {
+ if (it.second.usageEnd != RA_INTER_RANGE_END)
+ continue;
+ if (imlSegment->nextSegmentBranchTaken)
+ IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first);
+ if (imlSegment->nextSegmentBranchNotTaken)
+ IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchNotTaken, it.first);
+ }
+ }
+}
+
+void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
+{
+ IMLRA_MergeCloseAbstractRanges(ctx);
+ // extra pass to move register loads and stores out of loops
+ IMLRA_ExtendAbstractRangesOutOfLoops(ctx);
+ // calculate liveness ranges
+ for (auto& segIt : ctx.deprGenContext->segmentList2)
+ IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt);
+}
+
+void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange)
+{
+ bool isRead = false;
+ bool isWritten = false;
+ bool isOverwritten = false;
+ for (auto& location : subrange->list_accessLocations)
+ {
+ if (location.IsRead())
+ {
+ isRead = true;
+ }
+ if (location.IsWrite())
+ {
+ if (isRead == false)
+ isOverwritten = true;
+ isWritten = true;
+ }
+ }
+ subrange->_noLoad = isOverwritten;
+ subrange->hasStore = isWritten;
+
+ if (subrange->interval.ExtendsPreviousSegment())
+ subrange->_noLoad = true;
+}
+
+struct subrangeEndingInfo_t
+{
+ raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE];
+ sint32 subrangeCount;
+
+ bool hasUndefinedEndings;
+};
+
+void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info)
+{
+ if (depth >= 30)
+ {
+ info->hasUndefinedEndings = true;
+ return;
+ }
+ if (subrange->lastIterationIndex == iterationIndex)
+ return; // already processed
+ subrange->lastIterationIndex = iterationIndex;
+ if (subrange->hasStoreDelayed)
+ return; // no need to traverse this subrange
+ IMLSegment* imlSegment = subrange->imlSegment;
+ if (!subrange->interval.ExtendsIntoNextSegment())
+ {
+ // ending segment
+ if (info->subrangeCount >= SUBRANGE_LIST_SIZE)
+ {
+ info->hasUndefinedEndings = true;
+ return;
+ }
+ else
+ {
+ info->subrangeList[info->subrangeCount] = subrange;
+ info->subrangeCount++;
+ }
+ return;
+ }
+
+ // traverse next subranges in flow
+ if (imlSegment->nextSegmentBranchNotTaken)
+ {
+ if (subrange->subrangeBranchNotTaken == nullptr)
+ {
+ info->hasUndefinedEndings = true;
+ }
+ else
+ {
+ _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info);
+ }
+ }
+ if (imlSegment->nextSegmentBranchTaken)
+ {
+ if (subrange->subrangeBranchTaken == nullptr)
+ {
+ info->hasUndefinedEndings = true;
+ }
+ else
+ {
+ _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info);
+ }
+ }
+}
+
+static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange)
+{
+ if (!subrange->interval.ExtendsIntoNextSegment())
+ return;
+ // analyze data flow across segments (if this segment has writes)
+ if (subrange->hasStore)
+ {
+ subrangeEndingInfo_t writeEndingInfo;
+ writeEndingInfo.subrangeCount = 0;
+ writeEndingInfo.hasUndefinedEndings = false;
+ _findSubrangeWriteEndings(subrange, IMLRA_GetNextIterationIndex(), 0, &writeEndingInfo);
+ if (writeEndingInfo.hasUndefinedEndings == false)
+ {
+ // get cost of delaying store into endings
+ sint32 delayStoreCost = 0;
+ bool alreadyStoredInAllEndings = true;
+ for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
+ {
+ raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
+ if (subrangeItr->hasStore)
+ continue; // this ending already stores, no extra cost
+ alreadyStoredInAllEndings = false;
+ sint32 storeCost = IMLRA_GetSegmentReadWriteCost(subrangeItr->imlSegment);
+ delayStoreCost = std::max(storeCost, delayStoreCost);
+ }
+ if (alreadyStoredInAllEndings)
+ {
+ subrange->hasStore = false;
+ subrange->hasStoreDelayed = true;
+ }
+ else if (delayStoreCost <= IMLRA_GetSegmentReadWriteCost(subrange->imlSegment))
+ {
+ subrange->hasStore = false;
+ subrange->hasStoreDelayed = true;
+ for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
+ {
+ raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
+ subrangeItr->hasStore = true;
+ }
+ }
+ }
+ }
+}
+
+void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext)
+{
+ // this function is called after _AssignRegisters(), which means that all liveness ranges are already final and must not be modified anymore
+ // track read/write dependencies per segment
+ for (auto& seg : ppcImlGenContext->segmentList2)
+ {
+ raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
+ while (subrange)
+ {
+ IMLRA_AnalyzeSubrangeDataDependency(subrange);
+ subrange = subrange->link_allSegmentRanges.next;
+ }
+ }
+ // propagate information across segment boundaries
+ for (auto& seg : ppcImlGenContext->segmentList2)
+ {
+ raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
+ while (subrange)
+ {
+ IMLRA_AnalyzeRangeDataFlow(subrange);
+ subrange = subrange->link_allSegmentRanges.next;
+ }
+ }
+}
+
+/* Generate move instructions */
+
+inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId)
+{
+ return IMLReg(baseFormat, baseFormat, 0, regId);
+}
+
+// prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts
+void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
+{
+ std::unordered_map virtId2PhysReg;
+ boost::container::small_vector activeRanges;
+ raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
+ raInstructionEdge currentEdge;
+ for (size_t i = 0; i < imlSegment->imlList.size(); i++)
+ {
+ currentEdge.Set(i, false); // set to instruction index on output edge
+ // activate ranges which begin before or during this instruction
+ while (currentRange && currentRange->interval.start <= currentEdge)
+ {
+ cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict
+
+ virtId2PhysReg[currentRange->GetVirtualRegister()] = currentRange->GetPhysicalRegister();
+ activeRanges.push_back(currentRange);
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ // rewrite registers
+ imlSegment->imlList[i].RewriteGPR(virtId2PhysReg);
+ // deactivate ranges which end during this instruction
+ auto it = activeRanges.begin();
+ while (it != activeRanges.end())
+ {
+ if ((*it)->interval.end <= currentEdge)
+ {
+ virtId2PhysReg.erase((*it)->GetVirtualRegister());
+ it = activeRanges.erase(it);
+ }
+ else
+ ++it;
+ }
+ }
+}
+
+void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
+{
+ IMLRA_RewriteRegisters(ctx, imlSegment);
+
+#if DEBUG_RA_INSTRUCTION_GEN
+ cemuLog_log(LogType::Force, "");
+ cemuLog_log(LogType::Force, "[Seg before RA]");
+ IMLDebug_DumpSegment(nullptr, imlSegment, true);
+#endif
+
+ bool hadSuffixInstruction = imlSegment->HasSuffixInstruction();
+
+ std::vector rebuiltInstructions;
+ sint32 numInstructionsWithoutSuffix = (sint32)imlSegment->imlList.size() - (imlSegment->HasSuffixInstruction() ? 1 : 0);
+
+ if (imlSegment->imlList.empty())
+ {
+ // empty segments need special handling (todo - look into merging this with the core logic below eventually)
+ // store all ranges
+ raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
+ while (currentRange)
+ {
+ if (currentRange->hasStore)
+ rebuiltInstructions.emplace_back().make_name_r(currentRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()));
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ // load ranges
+ currentRange = imlSegment->raInfo.linkedList_allSubranges;
+ while (currentRange)
+ {
+ if (!currentRange->_noLoad)
+ {
+ cemu_assert_debug(currentRange->interval.ExtendsIntoNextSegment());
+ rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
+ }
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ imlSegment->imlList = std::move(rebuiltInstructions);
+ return;
+ }
+
+ // make sure that no range exceeds the suffix instruction input edge except if they need to be loaded for the next segment (todo - for those, set the start point accordingly?)
+ {
+ raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
+ raInstructionEdge edge;
+ if (imlSegment->HasSuffixInstruction())
+ edge.Set(numInstructionsWithoutSuffix, true);
+ else
+ edge.Set(numInstructionsWithoutSuffix - 1, false);
+
+ while (currentRange)
+ {
+ if (!currentRange->interval.IsNextSegmentOnly() && currentRange->interval.end > edge)
+ {
+ currentRange->interval.SetEnd(edge);
+ }
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ }
+
+#if DEBUG_RA_INSTRUCTION_GEN
+ cemuLog_log(LogType::Force, "");
+ cemuLog_log(LogType::Force, "--- Intermediate liveness info ---");
+ {
+ raLivenessRange* dbgRange = imlSegment->raInfo.linkedList_allSubranges;
+ while (dbgRange)
+ {
+ cemuLog_log(LogType::Force, "Range i{}: {}-{}", dbgRange->GetVirtualRegister(), dbgRange->interval2.start.GetDebugString(), dbgRange->interval2.end.GetDebugString());
+ dbgRange = dbgRange->link_allSegmentRanges.next;
+ }
+ }
+#endif
+
+ boost::container::small_vector activeRanges;
+ // first we add all the ranges that extend from the previous segment, some of these will end immediately at the first instruction so we might need to store them early
+ raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
+ // make all ranges active that start on RA_INTER_RANGE_START
+ while (currentRange && currentRange->interval.start.ConnectsToPreviousSegment())
+ {
+ activeRanges.push_back(currentRange);
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ // store all ranges that end before the first output edge (includes RA_INTER_RANGE_START)
+ auto it = activeRanges.begin();
+ raInstructionEdge firstOutputEdge;
+ firstOutputEdge.Set(0, false);
+ while (it != activeRanges.end())
+ {
+ if ((*it)->interval.end < firstOutputEdge)
+ {
+ raLivenessRange* storedRange = *it;
+ if (storedRange->hasStore)
+ rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
+ it = activeRanges.erase(it);
+ continue;
+ }
+ ++it;
+ }
+
+ sint32 numInstructions = (sint32)imlSegment->imlList.size();
+ for (sint32 i = 0; i < numInstructions; i++)
+ {
+ raInstructionEdge curEdge;
+ // input edge
+ curEdge.SetRaw(i * 2 + 1); // +1 to include ranges that start at the output of the instruction
+ while (currentRange && currentRange->interval.start <= curEdge)
+ {
+ if (!currentRange->_noLoad)
+ {
+ rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
+ }
+ activeRanges.push_back(currentRange);
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ // copy instruction
+ rebuiltInstructions.push_back(imlSegment->imlList[i]);
+ // output edge
+ curEdge.SetRaw(i * 2 + 1 + 1);
+ // also store ranges that end on the next input edge, we handle this by adding an extra 1 above
+ auto it = activeRanges.begin();
+ while (it != activeRanges.end())
+ {
+ if ((*it)->interval.end <= curEdge)
+ {
+ // range expires
+ // todo - check hasStore
+ raLivenessRange* storedRange = *it;
+ if (storedRange->hasStore)
+ {
+ cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix
+ rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
+ }
+ it = activeRanges.erase(it);
+ continue;
+ }
+ ++it;
+ }
+ }
+ // if there is no suffix instruction we currently need to handle the final loads here
+ cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
+ if (imlSegment->HasSuffixInstruction())
+ {
+ if (currentRange)
+ {
+ cemuLog_logDebug(LogType::Force, "[DEBUG] GenerateSegmentMoveInstructions() hit suffix path with non-null currentRange. Segment: {:08x}", imlSegment->ppcAddress);
+ }
+ for (auto& remainingRange : activeRanges)
+ {
+ cemu_assert_debug(!remainingRange->hasStore);
+ }
+ }
+ else
+ {
+ for (auto& remainingRange : activeRanges)
+ {
+ cemu_assert_debug(!remainingRange->hasStore); // this range still needs to be stored
+ }
+ while (currentRange)
+ {
+ cemu_assert_debug(currentRange->interval.IsNextSegmentOnly());
+ cemu_assert_debug(!currentRange->_noLoad);
+ rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ }
+
+ imlSegment->imlList = std::move(rebuiltInstructions);
+ cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
+
+#if DEBUG_RA_INSTRUCTION_GEN
+ cemuLog_log(LogType::Force, "");
+ cemuLog_log(LogType::Force, "[Seg after RA]");
+ IMLDebug_DumpSegment(nullptr, imlSegment, false);
+#endif
+}
+
+void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx)
+{
+ for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
+ {
+ IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
+ IMLRA_GenerateSegmentMoveInstructions2(ctx, imlSegment);
+ }
+}
+
+static void DbgVerifyFixedRegRequirements(IMLSegment* imlSegment)
+{
+#if DEBUG_RA_EXTRA_VALIDATION
+ std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
+ for(auto& fixedReq : frr)
+ {
+ for (raLivenessRange* range = imlSegment->raInfo.linkedList_allSubranges; range; range = range->link_allSegmentRanges.next)
+ {
+ if (!range->interval2.ContainsEdge(fixedReq.pos))
+ continue;
+ // verify if the requirement is compatible
+ if(range->GetVirtualRegister() == fixedReq.regId)
+ {
+ cemu_assert(range->HasPhysicalRegister());
+ cemu_assert(fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register matches, but not assigned the right physical register
+ }
+ else
+ {
+ cemu_assert(!fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register does not match, but using the reserved physical register
+ }
+ }
+ }
+#endif
+}
+
+static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx)
+{
+#if DEBUG_RA_EXTRA_VALIDATION
+ for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
+ {
+ IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ PPCRecRA_debugValidateSubrange(subrangeItr);
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ }
+ // check that no range validates register requirements
+ for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
+ {
+ DbgVerifyFixedRegRequirements(ctx.deprGenContext->segmentList2[s]);
+ }
+#endif
+}
+
+void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam)
+{
+ IMLRegisterAllocatorContext ctx;
+ ctx.raParam = &raParam;
+ ctx.deprGenContext = ppcImlGenContext;
+
+ IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext);
+ ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment
+ ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size());
+ IMLRA_CalculateLivenessRanges(ctx);
+ IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx);
+ IMLRA_AssignRegisters(ctx, ppcImlGenContext);
+ DbgVerifyAllRanges(ctx);
+ IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext);
+ IMLRA_GenerateMoveInstructions(ctx);
+
+ IMLRA_DeleteAllRanges(ppcImlGenContext);
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
new file mode 100644
index 00000000..0a54e4cb
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
@@ -0,0 +1,125 @@
+#pragma once
+
+// container for storing a set of register indices
+// specifically optimized towards storing typical range of physical register indices (expected to be below 64)
+class IMLPhysRegisterSet
+{
+public:
+ void SetAvailable(uint32 index)
+ {
+ cemu_assert_debug(index < 64);
+ m_regBitmask |= ((uint64)1 << index);
+ }
+
+ void SetReserved(uint32 index)
+ {
+ cemu_assert_debug(index < 64);
+ m_regBitmask &= ~((uint64)1 << index);
+ }
+
+ void SetAllAvailable()
+ {
+ m_regBitmask = ~0ull;
+ }
+
+ bool HasAllAvailable() const
+ {
+ return m_regBitmask == ~0ull;
+ }
+
+ bool IsAvailable(uint32 index) const
+ {
+ return (m_regBitmask & ((uint64)1 << index)) != 0;
+ }
+
+ IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other)
+ {
+ this->m_regBitmask &= other.m_regBitmask;
+ return *this;
+ }
+
+ IMLPhysRegisterSet& operator=(const IMLPhysRegisterSet& other)
+ {
+ this->m_regBitmask = other.m_regBitmask;
+ return *this;
+ }
+
+ void RemoveRegisters(const IMLPhysRegisterSet& other)
+ {
+ this->m_regBitmask &= ~other.m_regBitmask;
+ }
+
+ bool HasAnyAvailable() const
+ {
+ return m_regBitmask != 0;
+ }
+
+ bool HasExactlyOneAvailable() const
+ {
+ return m_regBitmask != 0 && (m_regBitmask & (m_regBitmask - 1)) == 0;
+ }
+
+ // returns index of first available register. Do not call when HasAnyAvailable() == false
+ IMLPhysReg GetFirstAvailableReg()
+ {
+ cemu_assert_debug(m_regBitmask != 0);
+ sint32 regIndex = 0;
+ auto tmp = m_regBitmask;
+ while ((tmp & 0xFF) == 0)
+ {
+ regIndex += 8;
+ tmp >>= 8;
+ }
+ while ((tmp & 0x1) == 0)
+ {
+ regIndex++;
+ tmp >>= 1;
+ }
+ return regIndex;
+ }
+
+ // returns index of next available register (search includes any register index >= startIndex)
+ // returns -1 if there is no more register
+ IMLPhysReg GetNextAvailableReg(sint32 startIndex) const
+ {
+ if (startIndex >= 64)
+ return -1;
+ uint32 regIndex = startIndex;
+ auto tmp = m_regBitmask;
+ tmp >>= regIndex;
+ if (!tmp)
+ return -1;
+ while ((tmp & 0xFF) == 0)
+ {
+ regIndex += 8;
+ tmp >>= 8;
+ }
+ while ((tmp & 0x1) == 0)
+ {
+ regIndex++;
+ tmp >>= 1;
+ }
+ return regIndex;
+ }
+
+ sint32 CountAvailableRegs() const
+ {
+ return std::popcount(m_regBitmask);
+ }
+
+private:
+ uint64 m_regBitmask{ 0 };
+};
+
+struct IMLRegisterAllocatorParameters
+{
+ inline IMLPhysRegisterSet& GetPhysRegPool(IMLRegFormat regFormat)
+ {
+ return perTypePhysPool[stdx::to_underlying(regFormat)];
+ }
+
+ IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];
+ std::unordered_map regIdToName;
+};
+
+void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam);
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
new file mode 100644
index 00000000..583d5905
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
@@ -0,0 +1,635 @@
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+#include "IMLRegisterAllocatorRanges.h"
+#include "util/helpers/MemoryPool.h"
+
+uint32 IMLRA_GetNextIterationIndex();
+
+IMLRegID raLivenessRange::GetVirtualRegister() const
+{
+ return virtualRegister;
+}
+
+sint32 raLivenessRange::GetPhysicalRegister() const
+{
+ return physicalRegister;
+}
+
+IMLName raLivenessRange::GetName() const
+{
+ return name;
+}
+
+void raLivenessRange::SetPhysicalRegister(IMLPhysReg physicalRegister)
+{
+ this->physicalRegister = physicalRegister;
+}
+
+void raLivenessRange::SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister)
+{
+ auto clusterRanges = GetAllSubrangesInCluster();
+ for(auto& range : clusterRanges)
+ range->physicalRegister = physicalRegister;
+}
+
+boost::container::small_vector raLivenessRange::GetAllSubrangesInCluster()
+{
+ uint32 iterationIndex = IMLRA_GetNextIterationIndex();
+ boost::container::small_vector subranges;
+ subranges.push_back(this);
+ this->lastIterationIndex = iterationIndex;
+ size_t i = 0;
+ while(isubrangeBranchTaken && cur->subrangeBranchTaken->lastIterationIndex != iterationIndex)
+ {
+ cur->subrangeBranchTaken->lastIterationIndex = iterationIndex;
+ subranges.push_back(cur->subrangeBranchTaken);
+ }
+ if(cur->subrangeBranchNotTaken && cur->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
+ {
+ cur->subrangeBranchNotTaken->lastIterationIndex = iterationIndex;
+ subranges.push_back(cur->subrangeBranchNotTaken);
+ }
+ // check predecessors
+ for(auto& prev : cur->previousRanges)
+ {
+ if(prev->lastIterationIndex != iterationIndex)
+ {
+ prev->lastIterationIndex = iterationIndex;
+ subranges.push_back(prev);
+ }
+ }
+ }
+ return subranges;
+}
+
+void raLivenessRange::GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs)
+{
+ range->lastIterationIndex = iterationIndex;
+ for (auto& it : range->list_fixedRegRequirements)
+ allowedRegs &= it.allowedReg;
+ // check successors
+ if (range->subrangeBranchTaken && range->subrangeBranchTaken->lastIterationIndex != iterationIndex)
+ GetAllowedRegistersExRecursive(range->subrangeBranchTaken, iterationIndex, allowedRegs);
+ if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
+ GetAllowedRegistersExRecursive(range->subrangeBranchNotTaken, iterationIndex, allowedRegs);
+ // check predecessors
+ for (auto& prev : range->previousRanges)
+ {
+ if (prev->lastIterationIndex != iterationIndex)
+ GetAllowedRegistersExRecursive(prev, iterationIndex, allowedRegs);
+ }
+};
+
+bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters)
+{
+ uint32 iterationIndex = IMLRA_GetNextIterationIndex();
+ allowedRegisters.SetAllAvailable();
+ GetAllowedRegistersExRecursive(this, iterationIndex, allowedRegisters);
+ return !allowedRegisters.HasAllAvailable();
+}
+
+IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool)
+{
+ IMLPhysRegisterSet fixedRegRequirements = regPool;
+ if(interval.ExtendsPreviousSegment() || interval.ExtendsIntoNextSegment())
+ {
+ auto clusterRanges = GetAllSubrangesInCluster();
+ for(auto& subrange : clusterRanges)
+ {
+ for(auto& fixedRegLoc : subrange->list_fixedRegRequirements)
+ fixedRegRequirements &= fixedRegLoc.allowedReg;
+ }
+ return fixedRegRequirements;
+ }
+ for(auto& fixedRegLoc : list_fixedRegRequirements)
+ fixedRegRequirements &= fixedRegLoc.allowedReg;
+ return fixedRegRequirements;
+}
+
+void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange)
+{
+ IMLRegID regId = subrange->GetVirtualRegister();
+ auto it = root.find(regId);
+ if (it == root.end())
+ {
+ // new single element
+ root.try_emplace(regId, subrange);
+ subrange->link_sameVirtualRegister.prev = nullptr;
+ subrange->link_sameVirtualRegister.next = nullptr;
+ }
+ else
+ {
+ // insert in first position
+ raLivenessRange* priorFirst = it->second;
+ subrange->link_sameVirtualRegister.next = priorFirst;
+ it->second = subrange;
+ subrange->link_sameVirtualRegister.prev = nullptr;
+ priorFirst->link_sameVirtualRegister.prev = subrange;
+ }
+}
+
+void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange)
+{
+ subrange->link_allSegmentRanges.next = *root;
+ if (*root)
+ (*root)->link_allSegmentRanges.prev = subrange;
+ subrange->link_allSegmentRanges.prev = nullptr;
+ *root = subrange;
+}
+
+void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange)
+{
+#ifdef CEMU_DEBUG_ASSERT
+ raLivenessRange* cur = root.find(subrange->GetVirtualRegister())->second;
+ bool hasRangeFound = false;
+ while(cur)
+ {
+ if(cur == subrange)
+ {
+ hasRangeFound = true;
+ break;
+ }
+ cur = cur->link_sameVirtualRegister.next;
+ }
+ cemu_assert_debug(hasRangeFound);
+#endif
+ IMLRegID regId = subrange->GetVirtualRegister();
+ raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next;
+ raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev;
+ raLivenessRange* newBase = prevRange ? prevRange : nextRange;
+ if (prevRange)
+ prevRange->link_sameVirtualRegister.next = subrange->link_sameVirtualRegister.next;
+ if (nextRange)
+ nextRange->link_sameVirtualRegister.prev = subrange->link_sameVirtualRegister.prev;
+
+ if (!prevRange)
+ {
+ if (nextRange)
+ {
+ root.find(regId)->second = nextRange;
+ }
+ else
+ {
+ cemu_assert_debug(root.find(regId)->second == subrange);
+ root.erase(regId);
+ }
+ }
+#ifdef CEMU_DEBUG_ASSERT
+ subrange->link_sameVirtualRegister.prev = (raLivenessRange*)1;
+ subrange->link_sameVirtualRegister.next = (raLivenessRange*)1;
+#endif
+}
+
+void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange)
+{
+ raLivenessRange* tempPrev = subrange->link_allSegmentRanges.prev;
+ if (subrange->link_allSegmentRanges.prev)
+ subrange->link_allSegmentRanges.prev->link_allSegmentRanges.next = subrange->link_allSegmentRanges.next;
+ else
+ (*root) = subrange->link_allSegmentRanges.next;
+ if (subrange->link_allSegmentRanges.next)
+ subrange->link_allSegmentRanges.next->link_allSegmentRanges.prev = tempPrev;
+#ifdef CEMU_DEBUG_ASSERT
+ subrange->link_allSegmentRanges.prev = (raLivenessRange*)1;
+ subrange->link_allSegmentRanges.next = (raLivenessRange*)1;
+#endif
+}
+
+MemoryPoolPermanentObjects memPool_livenessSubrange(4096);
+
+// startPosition and endPosition are inclusive
+raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition)
+{
+ raLivenessRange* range = memPool_livenessSubrange.acquireObj();
+ range->previousRanges.clear();
+ range->list_accessLocations.clear();
+ range->list_fixedRegRequirements.clear();
+ range->imlSegment = imlSegment;
+
+ cemu_assert_debug(startPosition <= endPosition);
+ range->interval.start = startPosition;
+ range->interval.end = endPosition;
+
+ // register mapping
+ range->virtualRegister = virtualRegister;
+ range->name = name;
+ range->physicalRegister = -1;
+ // default values
+ range->hasStore = false;
+ range->hasStoreDelayed = false;
+ range->lastIterationIndex = 0;
+ range->subrangeBranchNotTaken = nullptr;
+ range->subrangeBranchTaken = nullptr;
+ cemu_assert_debug(range->previousRanges.empty());
+ range->_noLoad = false;
+ // add to segment linked lists
+ PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range);
+ PPCRecRARange_addLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range);
+ return range;
+}
+
+void _unlinkSubrange(raLivenessRange* range)
+{
+ IMLSegment* imlSegment = range->imlSegment;
+ PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range);
+ PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range);
+ // unlink reverse references
+ if(range->subrangeBranchTaken)
+ range->subrangeBranchTaken->previousRanges.erase(std::find(range->subrangeBranchTaken->previousRanges.begin(), range->subrangeBranchTaken->previousRanges.end(), range));
+ if(range->subrangeBranchNotTaken)
+ range->subrangeBranchNotTaken->previousRanges.erase(std::find(range->subrangeBranchNotTaken->previousRanges.begin(), range->subrangeBranchNotTaken->previousRanges.end(), range));
+ range->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1;
+ range->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1;
+ // remove forward references
+ for(auto& prev : range->previousRanges)
+ {
+ if(prev->subrangeBranchTaken == range)
+ prev->subrangeBranchTaken = nullptr;
+ if(prev->subrangeBranchNotTaken == range)
+ prev->subrangeBranchNotTaken = nullptr;
+ }
+ range->previousRanges.clear();
+}
+
+void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range)
+{
+ _unlinkSubrange(range);
+ range->list_accessLocations.clear();
+ range->list_fixedRegRequirements.clear();
+ memPool_livenessSubrange.releaseObj(range);
+}
+
+void IMLRA_DeleteRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range)
+{
+ auto clusterRanges = range->GetAllSubrangesInCluster();
+ for (auto& subrange : clusterRanges)
+ IMLRA_DeleteRange(ppcImlGenContext, subrange);
+}
+
+void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
+{
+ for(auto& seg : ppcImlGenContext->segmentList2)
+ {
+ raLivenessRange* cur;
+ while(cur = seg->raInfo.linkedList_allSubranges)
+ IMLRA_DeleteRange(ppcImlGenContext, cur);
+ seg->raInfo.linkedList_allSubranges = nullptr;
+ seg->raInfo.linkedList_perVirtualRegister.clear();
+ }
+}
+
+void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange)
+{
+#ifdef CEMU_DEBUG_ASSERT
+ PPCRecRA_debugValidateSubrange(subrange);
+ PPCRecRA_debugValidateSubrange(absorbedSubrange);
+ if (subrange->imlSegment != absorbedSubrange->imlSegment)
+ assert_dbg();
+ cemu_assert_debug(subrange->interval.end == absorbedSubrange->interval.start);
+
+ if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
+ assert_dbg();
+ if (subrange == absorbedSubrange)
+ assert_dbg();
+#endif
+ // update references
+ subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
+ subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
+ absorbedSubrange->subrangeBranchTaken = nullptr;
+ absorbedSubrange->subrangeBranchNotTaken = nullptr;
+ if(subrange->subrangeBranchTaken)
+ *std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange;
+ if(subrange->subrangeBranchNotTaken)
+ *std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange;
+
+ // merge usage locations
+ for (auto& accessLoc : absorbedSubrange->list_accessLocations)
+ subrange->list_accessLocations.push_back(accessLoc);
+ absorbedSubrange->list_accessLocations.clear();
+ // merge fixed reg locations
+#ifdef CEMU_DEBUG_ASSERT
+ if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty())
+ {
+ cemu_assert_debug(subrange->list_fixedRegRequirements.back().pos < absorbedSubrange->list_fixedRegRequirements.front().pos);
+ }
+#endif
+ for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements)
+ subrange->list_fixedRegRequirements.push_back(fixedReg);
+ absorbedSubrange->list_fixedRegRequirements.clear();
+
+ subrange->interval.end = absorbedSubrange->interval.end;
+
+ PPCRecRA_debugValidateSubrange(subrange);
+
+ IMLRA_DeleteRange(ppcImlGenContext, absorbedSubrange);
+}
+
+// remove all inter-segment connections from the range cluster and split it into local ranges. Ranges are trimmed and if they have no access location they will be removed
+void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange)
+{
+ cemu_assert_debug(originRange->interval.ExtendsPreviousSegment() || originRange->interval.ExtendsIntoNextSegment()); // only call this on ranges that span multiple segments
+ auto clusterRanges = originRange->GetAllSubrangesInCluster();
+ for (auto& subrange : clusterRanges)
+ {
+ if (subrange->list_accessLocations.empty())
+ continue;
+ raInterval interval;
+ interval.SetInterval(subrange->list_accessLocations.front().pos, subrange->list_accessLocations.back().pos);
+ raLivenessRange* newSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end);
+ // copy locations and fixed reg indices
+ newSubrange->list_accessLocations = subrange->list_accessLocations;
+ newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements;
+ if(originRange->HasPhysicalRegister())
+ {
+ cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement
+ }
+ // validate
+ if(!newSubrange->list_accessLocations.empty())
+ {
+ cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval.start);
+ cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval.end);
+ }
+ if(!newSubrange->list_fixedRegRequirements.empty())
+ {
+ cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters()
+ cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval.end);
+ }
+ }
+ // delete the original range cluster
+ IMLRA_DeleteRangeCluster(ppcImlGenContext, originRange);
+}
+
+#ifdef CEMU_DEBUG_ASSERT
+void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
+{
+ // validate subrange
+ if (range->subrangeBranchTaken && range->subrangeBranchTaken->imlSegment != range->imlSegment->nextSegmentBranchTaken)
+ assert_dbg();
+ if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->imlSegment != range->imlSegment->nextSegmentBranchNotTaken)
+ assert_dbg();
+
+ if(range->subrangeBranchTaken || range->subrangeBranchNotTaken)
+ {
+ cemu_assert_debug(range->interval.end.ConnectsToNextSegment());
+ }
+ if(!range->previousRanges.empty())
+ {
+ cemu_assert_debug(range->interval.start.ConnectsToPreviousSegment());
+ }
+ // validate locations
+ if (!range->list_accessLocations.empty())
+ {
+ cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval.start);
+ cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval.end);
+ }
+ // validate fixed reg requirements
+ if (!range->list_fixedRegRequirements.empty())
+ {
+ cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval.start);
+ cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval.end);
+ for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++)
+ cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos);
+ }
+
+}
+#else
+void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {}
+#endif
+
+// trim start and end of range to match first and last read/write locations
+// does not trim start/endpoints which extend into the next/previous segment
+void IMLRA_TrimRangeToUse(raLivenessRange* range)
+{
+ if(range->list_accessLocations.empty())
+ {
+ // special case where we trim ranges extending from other segments to a single instruction edge
+ cemu_assert_debug(!range->interval.start.IsInstructionIndex() || !range->interval.end.IsInstructionIndex());
+ if(range->interval.start.IsInstructionIndex())
+ range->interval.start = range->interval.end;
+ if(range->interval.end.IsInstructionIndex())
+ range->interval.end = range->interval.start;
+ return;
+ }
+ // trim start and end
+ raInterval prevInterval = range->interval;
+ if(range->interval.start.IsInstructionIndex())
+ range->interval.start = range->list_accessLocations.front().pos;
+ if(range->interval.end.IsInstructionIndex())
+ range->interval.end = range->list_accessLocations.back().pos;
+ // extra checks
+#ifdef CEMU_DEBUG_ASSERT
+ cemu_assert_debug(range->interval.start <= range->interval.end);
+ for(auto& loc : range->list_accessLocations)
+ {
+ cemu_assert_debug(range->interval.ContainsEdge(loc.pos));
+ }
+ cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval));
+#endif
+}
+
+// split range at the given position
+// After the split there will be two ranges:
+// head -> subrange is shortened to end at splitIndex (exclusive)
+// tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange
+// if head has a physical register assigned it will not carry over to tail
+// The return value is the tail range
+// If trimToUsage is true, the end of the head subrange and the start of the tail subrange will be shrunk to fit the read/write locations within. If there are no locations then the range will be deleted
+raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage)
+{
+ cemu_assert_debug(splitPosition.IsInstructionIndex());
+ cemu_assert_debug(!subrange->interval.IsNextSegmentOnly() && !subrange->interval.IsPreviousSegmentOnly());
+ cemu_assert_debug(subrange->interval.ContainsEdge(splitPosition));
+ // determine new intervals
+ raInterval headInterval, tailInterval;
+ headInterval.SetInterval(subrange->interval.start, splitPosition-1);
+ tailInterval.SetInterval(splitPosition, subrange->interval.end);
+ cemu_assert_debug(headInterval.start <= headInterval.end);
+ cemu_assert_debug(tailInterval.start <= tailInterval.end);
+ // create tail
+ raLivenessRange* tailSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), tailInterval.start, tailInterval.end);
+ tailSubrange->SetPhysicalRegister(subrange->GetPhysicalRegister());
+ // carry over branch targets and update reverse references
+ tailSubrange->subrangeBranchTaken = subrange->subrangeBranchTaken;
+ tailSubrange->subrangeBranchNotTaken = subrange->subrangeBranchNotTaken;
+ subrange->subrangeBranchTaken = nullptr;
+ subrange->subrangeBranchNotTaken = nullptr;
+ if(tailSubrange->subrangeBranchTaken)
+ *std::find(tailSubrange->subrangeBranchTaken->previousRanges.begin(), tailSubrange->subrangeBranchTaken->previousRanges.end(), subrange) = tailSubrange;
+ if(tailSubrange->subrangeBranchNotTaken)
+ *std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange;
+ // we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case
+#ifdef CEMU_DEBUG_ASSERT
+ if(subrange->list_accessLocations.size() > 1)
+ {
+ for(size_t i=0; ilist_accessLocations.size()-1; i++)
+ {
+ cemu_assert_debug(subrange->list_accessLocations[i].pos < subrange->list_accessLocations[i+1].pos);
+ }
+ }
+#endif
+ // split locations
+ auto it = std::lower_bound(
+ subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), splitPosition,
+ [](const raAccessLocation& accessLoc, raInstructionEdge value) { return accessLoc.pos < value; }
+ );
+ size_t originalCount = subrange->list_accessLocations.size();
+ tailSubrange->list_accessLocations.insert(tailSubrange->list_accessLocations.end(), it, subrange->list_accessLocations.end());
+ subrange->list_accessLocations.erase(it, subrange->list_accessLocations.end());
+ cemu_assert_debug(subrange->list_accessLocations.empty() || subrange->list_accessLocations.back().pos < splitPosition);
+ cemu_assert_debug(tailSubrange->list_accessLocations.empty() || tailSubrange->list_accessLocations.front().pos >= splitPosition);
+ cemu_assert_debug(subrange->list_accessLocations.size() + tailSubrange->list_accessLocations.size() == originalCount);
+ // split fixed reg requirements
+ for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
+ {
+ raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
+ if (tailInterval.ContainsEdge(fixedReg->pos))
+ {
+ tailSubrange->list_fixedRegRequirements.push_back(*fixedReg);
+ }
+ }
+ // remove tail fixed reg requirements from head
+ for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
+ {
+ raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
+ if (!headInterval.ContainsEdge(fixedReg->pos))
+ {
+ subrange->list_fixedRegRequirements.resize(i);
+ break;
+ }
+ }
+ // adjust intervals
+ subrange->interval = headInterval;
+ tailSubrange->interval = tailInterval;
+ // trim to hole
+ if(trimToUsage)
+ {
+ if(subrange->list_accessLocations.empty() && (subrange->interval.start.IsInstructionIndex() && subrange->interval.end.IsInstructionIndex()))
+ {
+ IMLRA_DeleteRange(ppcImlGenContext, subrange);
+ subrange = nullptr;
+ }
+ else
+ {
+ IMLRA_TrimRangeToUse(subrange);
+ }
+ if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval.start.IsInstructionIndex() && tailSubrange->interval.end.IsInstructionIndex()))
+ {
+ IMLRA_DeleteRange(ppcImlGenContext, tailSubrange);
+ tailSubrange = nullptr;
+ }
+ else
+ {
+ IMLRA_TrimRangeToUse(tailSubrange);
+ }
+ }
+ // validation
+ cemu_assert_debug(!subrange || subrange->interval.start <= subrange->interval.end);
+ cemu_assert_debug(!tailSubrange || tailSubrange->interval.start <= tailSubrange->interval.end);
+ cemu_assert_debug(!tailSubrange || tailSubrange->interval.start >= splitPosition);
+ if (!trimToUsage)
+ cemu_assert_debug(!tailSubrange || tailSubrange->interval.start == splitPosition);
+
+ if(subrange)
+ PPCRecRA_debugValidateSubrange(subrange);
+ if(tailSubrange)
+ PPCRecRA_debugValidateSubrange(tailSubrange);
+ return tailSubrange;
+}
+
+sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment)
+{
+ sint32 v = imlSegment->loopDepth + 1;
+ v *= 5;
+ return v*v; // 25, 100, 225, 400
+}
+
+// calculate additional cost of range that it would have after calling _ExplodeRange() on it
+sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange)
+{
+ auto ranges = subrange->GetAllSubrangesInCluster();
+ sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges);
+ for (auto& subrange : ranges)
+ {
+ if (subrange->list_accessLocations.empty())
+ continue; // this range would be deleted and thus has no cost
+ sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
+ bool hasAdditionalLoad = subrange->interval.ExtendsPreviousSegment();
+ bool hasAdditionalStore = subrange->interval.ExtendsIntoNextSegment();
+ if(hasAdditionalLoad && subrange->list_accessLocations.front().IsWrite()) // if written before read then a load isn't necessary
+ {
+ cemu_assert_debug(!subrange->list_accessLocations.front().IsRead());
+ cost += segmentLoadStoreCost;
+ }
+ if(hasAdditionalStore)
+ {
+ bool hasWrite = std::find_if(subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != subrange->list_accessLocations.end();
+ if(!hasWrite) // ranges which don't modify their value do not need to be stored
+ cost += segmentLoadStoreCost;
+ }
+ }
+ // todo - properly calculating all the data-flow dependency based costs is more complex so this currently is an approximation
+ return cost;
+}
+
+sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition)
+{
+ // validation
+#ifdef CEMU_DEBUG_ASSERT
+ if (subrange->interval.ExtendsIntoNextSegment())
+ assert_dbg();
+#endif
+ cemu_assert_debug(splitPosition.IsInstructionIndex());
+
+ sint32 cost = 0;
+ // find split position in location list
+ if (subrange->list_accessLocations.empty())
+ return 0;
+ if (splitPosition <= subrange->list_accessLocations.front().pos)
+ return 0;
+ if (splitPosition > subrange->list_accessLocations.back().pos)
+ return 0;
+
+ size_t firstTailLocationIndex = 0;
+ for (size_t i = 0; i < subrange->list_accessLocations.size(); i++)
+ {
+ if (subrange->list_accessLocations[i].pos >= splitPosition)
+ {
+ firstTailLocationIndex = i;
+ break;
+ }
+ }
+ std::span headLocations{subrange->list_accessLocations.data(), firstTailLocationIndex};
+ std::span tailLocations{subrange->list_accessLocations.data() + firstTailLocationIndex, subrange->list_accessLocations.size() - firstTailLocationIndex};
+ cemu_assert_debug(headLocations.empty() || headLocations.back().pos < splitPosition);
+ cemu_assert_debug(tailLocations.empty() || tailLocations.front().pos >= splitPosition);
+
+ sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
+
+ auto CalculateCostFromLocationRange = [segmentLoadStoreCost](std::span locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32
+ {
+ if(locations.empty())
+ return 0;
+ sint32 cost = 0;
+ if(locations.front().IsRead() && trackLoadCost)
+ cost += segmentLoadStoreCost; // not overwritten, so there is a load cost
+ bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != locations.end();
+ if(hasWrite && trackStoreCost)
+ cost += segmentLoadStoreCost; // modified, so there is a store cost
+ return cost;
+ };
+
+ sint32 baseCost = CalculateCostFromLocationRange(subrange->list_accessLocations);
+
+ bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().IsRead() && tailLocations.front().IsWrite();
+
+ sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true);
+ cemu_assert_debug(newCost >= baseCost);
+ cost = newCost - baseCost;
+
+ return cost;
+}
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
new file mode 100644
index 00000000..b0685cc5
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
@@ -0,0 +1,364 @@
+#pragma once
+#include "IMLRegisterAllocator.h"
+
+struct raLivenessSubrangeLink
+{
+ struct raLivenessRange* prev;
+ struct raLivenessRange* next;
+};
+
+struct raInstructionEdge
+{
+ friend struct raInterval;
+public:
+ raInstructionEdge()
+ {
+ index = 0;
+ }
+
+ raInstructionEdge(sint32 instructionIndex, bool isInputEdge)
+ {
+ Set(instructionIndex, isInputEdge);
+ }
+
+ void Set(sint32 instructionIndex, bool isInputEdge)
+ {
+ if(instructionIndex == RA_INTER_RANGE_START || instructionIndex == RA_INTER_RANGE_END)
+ {
+ index = instructionIndex;
+ return;
+ }
+ index = instructionIndex * 2 + (isInputEdge ? 0 : 1);
+ cemu_assert_debug(index >= 0 && index < 0x100000*2); // make sure index value is sane
+ }
+
+ void SetRaw(sint32 index)
+ {
+ this->index = index;
+ cemu_assert_debug(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END || (index >= 0 && index < 0x100000*2)); // make sure index value is sane
+ }
+
+ // sint32 GetRaw()
+ // {
+ // this->index = index;
+ // }
+
+ std::string GetDebugString()
+ {
+ if(index == RA_INTER_RANGE_START)
+ return "RA_START";
+ else if(index == RA_INTER_RANGE_END)
+ return "RA_END";
+ std::string str = fmt::format("{}", GetInstructionIndex());
+ if(IsOnInputEdge())
+ str += "i";
+ else if(IsOnOutputEdge())
+ str += "o";
+ return str;
+ }
+
+ sint32 GetInstructionIndex() const
+ {
+ cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
+ return index >> 1;
+ }
+
+ // returns instruction index or RA_INTER_RANGE_START/RA_INTER_RANGE_END
+ sint32 GetInstructionIndexEx() const
+ {
+ if(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END)
+ return index;
+ return index >> 1;
+ }
+
+ sint32 GetRaw() const
+ {
+ return index;
+ }
+
+ bool IsOnInputEdge() const
+ {
+ cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
+ return (index&1) == 0;
+ }
+
+ bool IsOnOutputEdge() const
+ {
+ cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
+ return (index&1) != 0;
+ }
+
+ bool ConnectsToPreviousSegment() const
+ {
+ return index == RA_INTER_RANGE_START;
+ }
+
+ bool ConnectsToNextSegment() const
+ {
+ return index == RA_INTER_RANGE_END;
+ }
+
+ bool IsInstructionIndex() const
+ {
+ return index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END;
+ }
+
+ // comparison operators
+ bool operator>(const raInstructionEdge& other) const
+ {
+ return index > other.index;
+ }
+ bool operator<(const raInstructionEdge& other) const
+ {
+ return index < other.index;
+ }
+ bool operator<=(const raInstructionEdge& other) const
+ {
+ return index <= other.index;
+ }
+ bool operator>=(const raInstructionEdge& other) const
+ {
+ return index >= other.index;
+ }
+ bool operator==(const raInstructionEdge& other) const
+ {
+ return index == other.index;
+ }
+
+ raInstructionEdge operator+(sint32 offset) const
+ {
+ cemu_assert_debug(IsInstructionIndex());
+ cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END);
+ raInstructionEdge edge;
+ edge.index = index + offset;
+ return edge;
+ }
+
+ raInstructionEdge operator-(sint32 offset) const
+ {
+ cemu_assert_debug(IsInstructionIndex());
+ cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END);
+ raInstructionEdge edge;
+ edge.index = index - offset;
+ return edge;
+ }
+
+ raInstructionEdge& operator++()
+ {
+ cemu_assert_debug(IsInstructionIndex());
+ index++;
+ return *this;
+ }
+
+private:
+ sint32 index; // can also be RA_INTER_RANGE_START or RA_INTER_RANGE_END, otherwise contains instruction index * 2
+
+};
+
+struct raAccessLocation
+{
+ raAccessLocation(raInstructionEdge pos) : pos(pos) {}
+
+ bool IsRead() const
+ {
+ return pos.IsOnInputEdge();
+ }
+
+ bool IsWrite() const
+ {
+ return pos.IsOnOutputEdge();
+ }
+
+ raInstructionEdge pos;
+};
+
+struct raInterval
+{
+ raInterval()
+ {
+
+ }
+
+ raInterval(raInstructionEdge start, raInstructionEdge end)
+ {
+ SetInterval(start, end);
+ }
+
+ // isStartOnInput = Input+Output edge on first instruction. If false then only output
+ // isEndOnOutput = Input+Output edge on last instruction. If false then only input
+ void SetInterval(sint32 start, bool isStartOnInput, sint32 end, bool isEndOnOutput)
+ {
+ this->start.Set(start, isStartOnInput);
+ this->end.Set(end, !isEndOnOutput);
+ }
+
+ void SetInterval(raInstructionEdge start, raInstructionEdge end)
+ {
+ cemu_assert_debug(start <= end);
+ this->start = start;
+ this->end = end;
+ }
+
+ void SetStart(const raInstructionEdge& edge)
+ {
+ start = edge;
+ }
+
+ void SetEnd(const raInstructionEdge& edge)
+ {
+ end = edge;
+ }
+
+ sint32 GetStartIndex() const
+ {
+ return start.GetInstructionIndex();
+ }
+
+ sint32 GetEndIndex() const
+ {
+ return end.GetInstructionIndex();
+ }
+
+ bool ExtendsPreviousSegment() const
+ {
+ return start.ConnectsToPreviousSegment();
+ }
+
+ bool ExtendsIntoNextSegment() const
+ {
+ return end.ConnectsToNextSegment();
+ }
+
+ bool IsNextSegmentOnly() const
+ {
+ return start.ConnectsToNextSegment() && end.ConnectsToNextSegment();
+ }
+
+ bool IsPreviousSegmentOnly() const
+ {
+ return start.ConnectsToPreviousSegment() && end.ConnectsToPreviousSegment();
+ }
+
+ // returns true if range is contained within a single segment
+ bool IsLocal() const
+ {
+ return start.GetRaw() > RA_INTER_RANGE_START && end.GetRaw() < RA_INTER_RANGE_END;
+ }
+
+ bool ContainsInstructionIndex(sint32 instructionIndex) const
+ {
+ cemu_assert_debug(instructionIndex != RA_INTER_RANGE_START && instructionIndex != RA_INTER_RANGE_END);
+ return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx();
+ }
+
+ // similar to ContainsInstructionIndex, but allows RA_INTER_RANGE_START/END as input
+ bool ContainsInstructionIndexEx(sint32 instructionIndex) const
+ {
+ if(instructionIndex == RA_INTER_RANGE_START)
+ return start.ConnectsToPreviousSegment();
+ if(instructionIndex == RA_INTER_RANGE_END)
+ return end.ConnectsToNextSegment();
+ return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx();
+ }
+
+ bool ContainsEdge(const raInstructionEdge& edge) const
+ {
+ return edge >= start && edge <= end;
+ }
+
+ bool ContainsWholeInterval(const raInterval& other) const
+ {
+ return other.start >= start && other.end <= end;
+ }
+
+ bool IsOverlapping(const raInterval& other) const
+ {
+ return start <= other.end && end >= other.start;
+ }
+
+ sint32 GetPreciseDistance()
+ {
+ cemu_assert_debug(!start.ConnectsToNextSegment()); // how to handle this?
+ if(start == end)
+ return 1;
+ cemu_assert_debug(!end.ConnectsToPreviousSegment() && !end.ConnectsToNextSegment());
+ if(start.ConnectsToPreviousSegment())
+ return end.GetRaw() + 1;
+
+ return end.GetRaw() - start.GetRaw() + 1; // +1 because end is inclusive
+ }
+
+//private: not making these directly accessible only forces us to create loads of verbose getters and setters
+ raInstructionEdge start;
+ raInstructionEdge end;
+};
+
+struct raFixedRegRequirement
+{
+ raInstructionEdge pos;
+ IMLPhysRegisterSet allowedReg;
+};
+
+struct raLivenessRange
+{
+ IMLSegment* imlSegment;
+ raInterval interval;
+
+ // dirty state tracking
+ bool _noLoad;
+ bool hasStore;
+ bool hasStoreDelayed;
+ // next
+ raLivenessRange* subrangeBranchTaken;
+ raLivenessRange* subrangeBranchNotTaken;
+ // reverse counterpart of BranchTaken/BranchNotTaken
+ boost::container::small_vector previousRanges;
+ // processing
+ uint32 lastIterationIndex;
+ // instruction read/write locations
+ std::vector list_accessLocations;
+ // ordered list of all raInstructionEdge indices which require a fixed register
+ std::vector list_fixedRegRequirements;
+ // linked list (subranges with same GPR virtual register)
+ raLivenessSubrangeLink link_sameVirtualRegister;
+ // linked list (all subranges for this segment)
+ raLivenessSubrangeLink link_allSegmentRanges;
+ // register info
+ IMLRegID virtualRegister;
+ IMLName name;
+ // register allocator result
+ IMLPhysReg physicalRegister;
+
+ boost::container::small_vector GetAllSubrangesInCluster();
+ bool GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters); // if the cluster has fixed register requirements in any instruction this returns the combined register mask. Otherwise returns false in which case allowedRegisters is left undefined
+ IMLPhysRegisterSet GetAllowedRegisters(IMLPhysRegisterSet regPool); // return regPool with fixed register requirements filtered out
+
+ IMLRegID GetVirtualRegister() const;
+ sint32 GetPhysicalRegister() const;
+ bool HasPhysicalRegister() const { return physicalRegister >= 0; }
+ IMLName GetName() const;
+ void SetPhysicalRegister(IMLPhysReg physicalRegister);
+ void SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister);
+ void UnsetPhysicalRegister() { physicalRegister = -1; }
+
+ private:
+ void GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs);
+};
+
+raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition);
+void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange);
+void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
+
+void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange);
+
+void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange);
+
+raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage = false);
+
+void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
+
+// cost estimation
+sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment);
+sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange);
+//sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex);
+sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition);
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp
new file mode 100644
index 00000000..f3b6834f
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp
@@ -0,0 +1,133 @@
+#include "IMLInstruction.h"
+#include "IMLSegment.h"
+
+void IMLSegment::SetEnterable(uint32 enterAddress)
+{
+ cemu_assert_debug(!isEnterable || enterPPCAddress == enterAddress);
+ isEnterable = true;
+ enterPPCAddress = enterAddress;
+}
+
+bool IMLSegment::HasSuffixInstruction() const
+{
+ if (imlList.empty())
+ return false;
+ const IMLInstruction& imlInstruction = imlList.back();
+ return imlInstruction.IsSuffixInstruction();
+}
+
+sint32 IMLSegment::GetSuffixInstructionIndex() const
+{
+ cemu_assert_debug(HasSuffixInstruction());
+ return (sint32)(imlList.size() - 1);
+}
+
+IMLInstruction* IMLSegment::GetLastInstruction()
+{
+ if (imlList.empty())
+ return nullptr;
+ return &imlList.back();
+}
+
+void IMLSegment::SetLinkBranchNotTaken(IMLSegment* imlSegmentDst)
+{
+ if (nextSegmentBranchNotTaken)
+ nextSegmentBranchNotTaken->list_prevSegments.erase(std::find(nextSegmentBranchNotTaken->list_prevSegments.begin(), nextSegmentBranchNotTaken->list_prevSegments.end(), this));
+ nextSegmentBranchNotTaken = imlSegmentDst;
+ if(imlSegmentDst)
+ imlSegmentDst->list_prevSegments.push_back(this);
+}
+
+void IMLSegment::SetLinkBranchTaken(IMLSegment* imlSegmentDst)
+{
+ if (nextSegmentBranchTaken)
+ nextSegmentBranchTaken->list_prevSegments.erase(std::find(nextSegmentBranchTaken->list_prevSegments.begin(), nextSegmentBranchTaken->list_prevSegments.end(), this));
+ nextSegmentBranchTaken = imlSegmentDst;
+ if (imlSegmentDst)
+ imlSegmentDst->list_prevSegments.push_back(this);
+}
+
+IMLInstruction* IMLSegment::AppendInstruction()
+{
+ IMLInstruction& inst = imlList.emplace_back();
+ memset(&inst, 0, sizeof(IMLInstruction));
+ return &inst;
+}
+
+void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
+{
+ // make sure segments aren't already linked
+ if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
+ return;
+ // add as next segment for source
+ if (imlSegmentSrc->nextSegmentBranchNotTaken != nullptr)
+ assert_dbg();
+ imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst;
+ // add as previous segment for destination
+ imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
+}
+
+void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
+{
+ // make sure segments aren't already linked
+ if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
+ return;
+ // add as next segment for source
+ if (imlSegmentSrc->nextSegmentBranchTaken != nullptr)
+ assert_dbg();
+ imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst;
+ // add as previous segment for destination
+ imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
+}
+
+void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
+{
+ if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
+ {
+ imlSegmentSrc->nextSegmentBranchNotTaken = nullptr;
+ }
+ else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
+ {
+ imlSegmentSrc->nextSegmentBranchTaken = nullptr;
+ }
+ else
+ assert_dbg();
+
+ bool matchFound = false;
+ for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++)
+ {
+ if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc)
+ {
+ imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin() + i);
+ matchFound = true;
+ break;
+ }
+ }
+ if (matchFound == false)
+ assert_dbg();
+}
+
+/*
+ * Replaces all links to segment orig with linkts to segment new
+ */
+void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew)
+{
+ while (imlSegmentOrig->list_prevSegments.size() != 0)
+ {
+ IMLSegment* prevSegment = imlSegmentOrig->list_prevSegments[0];
+ if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig)
+ {
+ IMLSegment_RemoveLink(prevSegment, imlSegmentOrig);
+ IMLSegment_SetLinkBranchNotTaken(prevSegment, imlSegmentNew);
+ }
+ else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig)
+ {
+ IMLSegment_RemoveLink(prevSegment, imlSegmentOrig);
+ IMLSegment_SetLinkBranchTaken(prevSegment, imlSegmentNew);
+ }
+ else
+ {
+ assert_dbg();
+ }
+ }
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h
new file mode 100644
index 00000000..10e3dc06
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h
@@ -0,0 +1,193 @@
+#pragma once
+#include "IMLInstruction.h"
+
+#include
+
+// special values to mark the index of ranges that reach across the segment border
+#define RA_INTER_RANGE_START (-1)
+#define RA_INTER_RANGE_END (0x70000000)
+
+struct IMLSegmentPoint
+{
+ friend struct IMLSegmentInterval;
+
+ sint32 index;
+ struct IMLSegment* imlSegment; // do we really need to track this? SegmentPoints are always accessed via the segment that they are part of
+ IMLSegmentPoint* next;
+ IMLSegmentPoint* prev;
+
+ // the index is the instruction index times two.
+ // this gives us the ability to cover half an instruction with RA ranges
+ // covering only the first half of an instruction (0-0) means that the register is read, but not preserved
+ // covering first and the second half means the register is read and preserved
+ // covering only the second half means the register is written but not read
+
+ sint32 GetInstructionIndex() const
+ {
+ return index;
+ }
+
+ void SetInstructionIndex(sint32 index)
+ {
+ this->index = index;
+ }
+
+ void ShiftIfAfter(sint32 instructionIndex, sint32 shiftCount)
+ {
+ if (!IsPreviousSegment() && !IsNextSegment())
+ {
+ if (GetInstructionIndex() >= instructionIndex)
+ index += shiftCount;
+ }
+ }
+
+ void DecrementByOneInstruction()
+ {
+ index--;
+ }
+
+ // the segment point can point beyond the first and last instruction which indicates that it is an infinite range reaching up to the previous or next segment
+ bool IsPreviousSegment() const { return index == RA_INTER_RANGE_START; }
+ bool IsNextSegment() const { return index == RA_INTER_RANGE_END; }
+
+ // overload operand > and <
+ bool operator>(const IMLSegmentPoint& other) const { return index > other.index; }
+ bool operator<(const IMLSegmentPoint& other) const { return index < other.index; }
+ bool operator==(const IMLSegmentPoint& other) const { return index == other.index; }
+ bool operator!=(const IMLSegmentPoint& other) const { return index != other.index; }
+
+ // overload comparison operands for sint32
+ bool operator>(const sint32 other) const { return index > other; }
+ bool operator<(const sint32 other) const { return index < other; }
+ bool operator<=(const sint32 other) const { return index <= other; }
+ bool operator>=(const sint32 other) const { return index >= other; }
+};
+
+struct IMLSegmentInterval
+{
+ IMLSegmentPoint start;
+ IMLSegmentPoint end;
+
+ bool ContainsInstructionIndex(sint32 offset) const { return start <= offset && end > offset; }
+
+ bool IsRangeOverlapping(const IMLSegmentInterval& other)
+ {
+ // todo - compare the raw index
+ sint32 r1start = this->start.GetInstructionIndex();
+ sint32 r1end = this->end.GetInstructionIndex();
+ sint32 r2start = other.start.GetInstructionIndex();
+ sint32 r2end = other.end.GetInstructionIndex();
+ if (r1start < r2end && r1end > r2start)
+ return true;
+ if (this->start.IsPreviousSegment() && r1start == r2start)
+ return true;
+ if (this->end.IsNextSegment() && r1end == r2end)
+ return true;
+ return false;
+ }
+
+ bool ExtendsIntoPreviousSegment() const
+ {
+ return start.IsPreviousSegment();
+ }
+
+ bool ExtendsIntoNextSegment() const
+ {
+ return end.IsNextSegment();
+ }
+
+ bool IsNextSegmentOnly() const
+ {
+ if(!start.IsNextSegment())
+ return false;
+ cemu_assert_debug(end.IsNextSegment());
+ return true;
+ }
+
+ bool IsPreviousSegmentOnly() const
+ {
+ if (!end.IsPreviousSegment())
+ return false;
+ cemu_assert_debug(start.IsPreviousSegment());
+ return true;
+ }
+
+ sint32 GetDistance() const
+ {
+ // todo - assert if either start or end is outside the segment
+ // we may also want to switch this to raw indices?
+ return end.GetInstructionIndex() - start.GetInstructionIndex();
+ }
+};
+
+struct PPCSegmentRegisterAllocatorInfo_t
+{
+ // used during loop detection
+ bool isPartOfProcessedLoop{};
+ sint32 lastIterationIndex{};
+ // linked lists
+ struct raLivenessRange* linkedList_allSubranges{};
+ std::unordered_map linkedList_perVirtualRegister;
+};
+
+struct IMLSegment
+{
+ sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection)
+ sint32 loopDepth{};
+ uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address)
+ uint32 x64Offset{}; // x64 code offset of segment start
+ // list of intermediate instructions in this segment
+ std::vector imlList;
+ // segment link
+ IMLSegment* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch
+ IMLSegment* nextSegmentBranchTaken{};
+ bool nextSegmentIsUncertain{};
+ std::vector list_prevSegments{};
+ // source for overwrite analysis (if nextSegmentIsUncertain is true)
+ // sometimes a segment is marked as an exit point, but for the purposes of dead code elimination we know the next segment
+ IMLSegment* deadCodeEliminationHintSeg{};
+ std::vector list_deadCodeHintBy{};
+ // enterable segments
+ bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
+ uint32 enterPPCAddress{}; // used if isEnterable is true
+ // register allocator info
+ PPCSegmentRegisterAllocatorInfo_t raInfo{};
+ // segment state API
+ void SetEnterable(uint32 enterAddress);
+ void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst);
+ void SetLinkBranchTaken(IMLSegment* imlSegmentDst);
+
+ IMLSegment* GetBranchTaken()
+ {
+ return nextSegmentBranchTaken;
+ }
+
+ IMLSegment* GetBranchNotTaken()
+ {
+ return nextSegmentBranchNotTaken;
+ }
+
+ void SetNextSegmentForOverwriteHints(IMLSegment* seg)
+ {
+ cemu_assert_debug(!deadCodeEliminationHintSeg);
+ deadCodeEliminationHintSeg = seg;
+ if (seg)
+ seg->list_deadCodeHintBy.push_back(this);
+ }
+
+ // instruction API
+ IMLInstruction* AppendInstruction();
+
+ bool HasSuffixInstruction() const;
+ sint32 GetSuffixInstructionIndex() const;
+ IMLInstruction* GetLastInstruction();
+
+ // segment points
+ IMLSegmentPoint* segmentPointList{};
+};
+
+
+void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
+void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
+void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew);
+void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
index e558292b..96b5143e 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
+++ b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
@@ -21,6 +21,16 @@ public:
};
public:
+ ~PPCFunctionBoundaryTracker()
+ {
+ while (!map_ranges.empty())
+ {
+ PPCRange_t* range = *map_ranges.begin();
+ delete range;
+ map_ranges.erase(map_ranges.begin());
+ }
+ }
+
void trackStartPoint(MPTR startAddress)
{
processRange(startAddress, nullptr, nullptr);
@@ -40,10 +50,34 @@ public:
return false;
}
+ std::vector GetRanges()
+ {
+ std::vector r;
+ for (auto& it : map_ranges)
+ r.emplace_back(*it);
+ return r;
+ }
+
+ bool ContainsAddress(uint32 addr) const
+ {
+ for (auto& it : map_ranges)
+ {
+ if (addr >= it->startAddress && addr < it->getEndAddress())
+ return true;
+ }
+ return false;
+ }
+
+ const std::set& GetBranchTargets() const
+ {
+ return map_branchTargetsAll;
+ }
+
private:
void addBranchDestination(PPCRange_t* sourceRange, MPTR address)
{
- map_branchTargets.emplace(address);
+ map_queuedBranchTargets.emplace(address);
+ map_branchTargetsAll.emplace(address);
}
// process flow of instruction
@@ -114,7 +148,7 @@ private:
Espresso::BOField BO;
uint32 BI;
bool LK;
- Espresso::decodeOp_BCLR(opcode, BO, BI, LK);
+ Espresso::decodeOp_BCSPR(opcode, BO, BI, LK);
if (BO.branchAlways() && !LK)
{
// unconditional BLR
@@ -218,7 +252,7 @@ private:
auto rangeItr = map_ranges.begin();
PPCRange_t* previousRange = nullptr;
- for (std::set::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); )
+ for (std::set::const_iterator targetItr = map_queuedBranchTargets.begin() ; targetItr != map_queuedBranchTargets.end(); )
{
while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr))
{
@@ -239,7 +273,7 @@ private:
(*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length))
{
// delete visited targets
- targetItr = map_branchTargets.erase(targetItr);
+ targetItr = map_queuedBranchTargets.erase(targetItr);
continue;
}
@@ -289,5 +323,6 @@ private:
};
std::set