diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 72bbcf52..e798c1a7 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -39,7 +39,7 @@ jobs:
- name: "Install system dependencies"
run: |
sudo apt update -qq
- sudo apt install -y clang-15 cmake freeglut3-dev libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev libudev-dev nasm ninja-build
+ sudo apt install -y clang-15 cmake freeglut3-dev libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev libudev-dev nasm ninja-build libbluetooth-dev
- name: "Setup cmake"
uses: jwlawson/actions-setup-cmake@v2
@@ -96,7 +96,7 @@ jobs:
- name: "Install system dependencies"
run: |
sudo apt update -qq
- sudo apt install -y clang-15 cmake freeglut3-dev libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev nasm ninja-build appstream
+ sudo apt install -y clang-15 cmake freeglut3-dev libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev nasm ninja-build appstream libbluetooth-dev
- name: "Build AppImage"
run: |
@@ -177,6 +177,9 @@ jobs:
build-macos:
runs-on: macos-14
+ strategy:
+ matrix:
+ arch: [x86_64, arm64]
steps:
- name: "Checkout repo"
uses: actions/checkout@v4
@@ -202,7 +205,7 @@ jobs:
- name: "Install molten-vk"
run: |
- curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.2.9/MoltenVK-macos.tar
+ curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.3.0/MoltenVK-macos.tar
tar xf MoltenVK-macos.tar
sudo mkdir -p /usr/local/lib
sudo cp MoltenVK/MoltenVK/dynamic/dylib/macOS/libMoltenVK.dylib /usr/local/lib
@@ -236,7 +239,7 @@ jobs:
cd build
cmake .. ${{ env.BUILD_FLAGS }} \
-DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \
- -DCMAKE_OSX_ARCHITECTURES=x86_64 \
+ -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \
-DMACOS_BUNDLE=ON \
-G Ninja
@@ -259,5 +262,5 @@ jobs:
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
- name: cemu-bin-macos-x64
+ name: cemu-bin-macos-${{ matrix.arch }}
path: ./bin/Cemu.dmg
diff --git a/.github/workflows/deploy_experimental_release.yml b/.github/workflows/deploy_release.yml
similarity index 98%
rename from .github/workflows/deploy_experimental_release.yml
rename to .github/workflows/deploy_release.yml
index 97e0c69e..2b9ee491 100644
--- a/.github/workflows/deploy_experimental_release.yml
+++ b/.github/workflows/deploy_release.yml
@@ -1,4 +1,4 @@
-name: Deploy experimental release
+name: Deploy release
on:
workflow_dispatch:
inputs:
@@ -54,7 +54,7 @@ jobs:
next_version_major: ${{ needs.calculate-version.outputs.next_version_major }}
next_version_minor: ${{ needs.calculate-version.outputs.next_version_minor }}
deploy:
- name: Deploy experimental release
+ name: Deploy release
runs-on: ubuntu-22.04
needs: [call-release-build, calculate-version]
steps:
diff --git a/.github/workflows/deploy_stable_release.yml b/.github/workflows/deploy_stable_release.yml
deleted file mode 100644
index fd339e7d..00000000
--- a/.github/workflows/deploy_stable_release.yml
+++ /dev/null
@@ -1,85 +0,0 @@
-name: Create new release
-on:
- workflow_dispatch:
- inputs:
- PlaceholderInput:
- description: PlaceholderInput
- required: false
-jobs:
- call-release-build:
- uses: ./.github/workflows/build.yml
- with:
- deploymode: release
- deploy:
- name: Deploy release
- runs-on: ubuntu-20.04
- needs: call-release-build
- steps:
- - uses: actions/checkout@v3
-
- - uses: actions/download-artifact@v4
- with:
- name: cemu-bin-linux-x64
- path: cemu-bin-linux-x64
-
- - uses: actions/download-artifact@v4
- with:
- name: cemu-appimage-x64
- path: cemu-appimage-x64
-
- - uses: actions/download-artifact@v4
- with:
- name: cemu-bin-windows-x64
- path: cemu-bin-windows-x64
-
- - uses: actions/download-artifact@v4
- with:
- name: cemu-bin-macos-x64
- path: cemu-bin-macos-x64
-
- - name: Initialize
- run: |
- mkdir upload
- sudo apt update -qq
- sudo apt install -y zip
-
- - name: Get Cemu release version
- run: |
- gcc -o getversion .github/getversion.cpp
- echo "Cemu CI version: $(./getversion)"
- echo "CEMU_FOLDER_NAME=Cemu_$(./getversion)" >> $GITHUB_ENV
- echo "CEMU_VERSION=$(./getversion)" >> $GITHUB_ENV
-
- - name: Create release from windows-bin
- run: |
- ls ./
- ls ./bin/
- cp -R ./bin ./${{ env.CEMU_FOLDER_NAME }}
- mv cemu-bin-windows-x64/Cemu.exe ./${{ env.CEMU_FOLDER_NAME }}/Cemu.exe
- zip -9 -r upload/cemu-${{ env.CEMU_VERSION }}-windows-x64.zip ${{ env.CEMU_FOLDER_NAME }}
- rm -r ./${{ env.CEMU_FOLDER_NAME }}
-
- - name: Create appimage
- run: |
- VERSION=${{ env.CEMU_VERSION }}
- echo "Cemu Version is $VERSION"
- ls cemu-appimage-x64
- mv cemu-appimage-x64/Cemu-*-x86_64.AppImage upload/Cemu-$VERSION-x86_64.AppImage
-
- - name: Create release from ubuntu-bin
- run: |
- ls ./
- ls ./bin/
- cp -R ./bin ./${{ env.CEMU_FOLDER_NAME }}
- mv cemu-bin-linux-x64/Cemu ./${{ env.CEMU_FOLDER_NAME }}/Cemu
- zip -9 -r upload/cemu-${{ env.CEMU_VERSION }}-ubuntu-20.04-x64.zip ${{ env.CEMU_FOLDER_NAME }}
- rm -r ./${{ env.CEMU_FOLDER_NAME }}
-
- - name: Create release from macos-bin
- run: cp cemu-bin-macos-x64/Cemu.dmg upload/cemu-${{ env.CEMU_VERSION }}-macos-12-x64.dmg
-
- - name: Create release
- run: |
- wget -O ghr.tar.gz https://github.com/tcnksm/ghr/releases/download/v0.15.0/ghr_v0.15.0_linux_amd64.tar.gz
- tar xvzf ghr.tar.gz; rm ghr.tar.gz
- ghr_v0.15.0_linux_amd64/ghr -t ${{ secrets.GITHUB_TOKEN }} -n "Cemu ${{ env.CEMU_VERSION }}" -b "Changelog:" v${{ env.CEMU_VERSION }} ./upload
diff --git a/.github/workflows/generate_pot.yml b/.github/workflows/generate_pot.yml
index 7dfa86f8..b057d441 100644
--- a/.github/workflows/generate_pot.yml
+++ b/.github/workflows/generate_pot.yml
@@ -35,7 +35,7 @@ jobs:
-o cemu.pot
- name: Upload artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
name: POT file
path: ./cemu.pot
diff --git a/.gitmodules b/.gitmodules
index dc69c441..8f9772d3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -18,3 +18,6 @@
path = dependencies/imgui
url = https://github.com/ocornut/imgui
shallow = true
+[submodule "dependencies/xbyak_aarch64"]
+ path = dependencies/xbyak_aarch64
+ url = https://github.com/fujitsu/xbyak_aarch64
diff --git a/BUILD.md b/BUILD.md
index 44d69c6c..31c26531 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -46,10 +46,10 @@ To compile Cemu, a recent enough compiler and STL with C++20 support is required
### Dependencies
#### For Arch and derivatives:
-`sudo pacman -S --needed base-devel clang cmake freeglut git glm gtk3 libgcrypt libpulse libsecret linux-headers llvm nasm ninja systemd unzip zip`
+`sudo pacman -S --needed base-devel bluez-libs clang cmake freeglut git glm gtk3 libgcrypt libpulse libsecret linux-headers llvm nasm ninja systemd unzip zip`
#### For Debian, Ubuntu and derivatives:
-`sudo apt install -y cmake curl clang-15 freeglut3-dev git libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev libtool nasm ninja-build`
+`sudo apt install -y cmake curl clang-15 freeglut3-dev git libbluetooth-dev libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev libtool nasm ninja-build`
You may also need to install `libusb-1.0-0-dev` as a workaround for an issue with the vcpkg hidapi package.
@@ -57,7 +57,7 @@ At Step 3 in [Build Cemu using cmake and clang](#build-cemu-using-cmake-and-clan
`cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/clang-15 -DCMAKE_CXX_COMPILER=/usr/bin/clang++-15 -G Ninja -DCMAKE_MAKE_PROGRAM=/usr/bin/ninja`
#### For Fedora and derivatives:
-`sudo dnf install clang cmake cubeb-devel freeglut-devel git glm-devel gtk3-devel kernel-headers libgcrypt-devel libsecret-devel libtool libusb1-devel llvm nasm ninja-build perl-core systemd-devel zlib-devel zlib-static`
+`sudo dnf install bluez-libs-devel clang cmake cubeb-devel freeglut-devel git glm-devel gtk3-devel kernel-headers libgcrypt-devel libsecret-devel libtool libusb1-devel llvm nasm ninja-build perl-core systemd-devel wayland-protocols-devel zlib-devel zlib-static`
### Build Cemu
@@ -120,6 +120,9 @@ This section refers to running `cmake -S...` (truncated).
* Compiling failed during rebuild after `git pull` with an error that mentions RPATH
* Add the following and try running the command again:
* `-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON`
+* Environment variable `VCPKG_FORCE_SYSTEM_BINARIES` must be set.
+ * Execute the folowing and then try running the command again:
+ * `export VCPKG_FORCE_SYSTEM_BINARIES=1`
* If you are getting a random error, read the [package-name-and-platform]-out.log and [package-name-and-platform]-err.log for the actual reason to see if you might be lacking the headers from a dependency.
@@ -189,3 +192,41 @@ Then install the dependencies:
If CMake complains about Cemu already being compiled or another similar error, try deleting the `CMakeCache.txt` file inside the `build` folder and retry building.
+## CMake configure flags
+Some flags can be passed during CMake configure to customise which features are enabled on build.
+
+Example usage: `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DENABLE_SDL=ON -DENABLE_VULKAN=OFF`
+
+### All platforms
+| Flag | | Description | Default | Note |
+|--------------------|:--|-----------------------------------------------------------------------------|---------|--------------------|
+| ALLOW_PORTABLE | | Allow Cemu to use the `portable` directory to store configs and data | ON | |
+| CEMU_CXX_FLAGS | | Flags passed straight to the compiler, e.g. `-march=native`, `-Wall`, `/W3` | "" | |
+| ENABLE_CUBEB | | Enable cubeb audio backend | ON | |
+| ENABLE_DISCORD_RPC | | Enable Discord Rich presence support | ON | |
+| ENABLE_OPENGL | | Enable OpenGL graphics backend | ON | Currently required |
+| ENABLE_HIDAPI | | Enable HIDAPI (used for Wiimote controller API) | ON | |
+| ENABLE_SDL | | Enable SDLController controller API | ON | Currently required |
+| ENABLE_VCPKG | | Use VCPKG package manager to obtain dependencies | ON | |
+| ENABLE_VULKAN | | Enable the Vulkan graphics backend | ON | |
+| ENABLE_WXWIDGETS | | Enable wxWidgets UI | ON | Currently required |
+
+### Windows
+| Flag | Description | Default | Note |
+|--------------------|-----------------------------------|---------|--------------------|
+| ENABLE_DIRECTAUDIO | Enable DirectAudio audio backend | ON | Currently required |
+| ENABLE_DIRECTINPUT | Enable DirectInput controller API | ON | Currently required |
+| ENABLE_XAUDIO | Enable XAudio audio backend | ON | |
+| ENABLE_XINPUT | Enable XInput controller API | ON | |
+
+### Linux
+| Flag | Description | Default |
+|-----------------------|----------------------------------------------------|---------|
+| ENABLE_BLUEZ | Build with Bluez (used for Wiimote controller API) | ON |
+| ENABLE_FERAL_GAMEMODE | Enable Feral Interactive GameMode support | ON |
+| ENABLE_WAYLAND | Enable Wayland support | ON |
+
+### macOS
+| Flag | Description | Default |
+|--------------|------------------------------------------------|---------|
+| MACOS_BUNDLE | MacOS executable will be an application bundle | OFF |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5b5cff6c..aa491b9e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.21.1)
option(ENABLE_VCPKG "Enable the vcpkg package manager" ON)
option(MACOS_BUNDLE "The executable when built on macOS will be created as an application bundle" OFF)
+option(ALLOW_PORTABLE "Allow Cemu to be run in portable mode" ON)
# used by CI script to set version:
set(EMULATOR_VERSION_MAJOR "0" CACHE STRING "")
@@ -98,6 +99,7 @@ endif()
if (UNIX AND NOT APPLE)
option(ENABLE_WAYLAND "Build with Wayland support" ON)
option(ENABLE_FERAL_GAMEMODE "Enables Feral Interactive GameMode Support" ON)
+ option(ENABLE_BLUEZ "Build with Bluez support" ON)
endif()
option(ENABLE_OPENGL "Enables the OpenGL backend" ON)
@@ -122,23 +124,6 @@ if (WIN32)
endif()
option(ENABLE_CUBEB "Enabled cubeb backend" ON)
-# usb hid backends
-if (WIN32)
- option(ENABLE_NSYSHID_WINDOWS_HID "Enables the native Windows HID backend for nsyshid" ON)
-endif ()
-# libusb and windows hid backends shouldn't be active at the same time; otherwise we'd see all devices twice!
-if (NOT ENABLE_NSYSHID_WINDOWS_HID)
- option(ENABLE_NSYSHID_LIBUSB "Enables the libusb backend for nsyshid" ON)
-else ()
- set(ENABLE_NSYSHID_LIBUSB OFF CACHE BOOL "" FORCE)
-endif ()
-if (ENABLE_NSYSHID_WINDOWS_HID)
- add_compile_definitions(NSYSHID_ENABLE_BACKEND_WINDOWS_HID)
-endif ()
-if (ENABLE_NSYSHID_LIBUSB)
- add_compile_definitions(NSYSHID_ENABLE_BACKEND_LIBUSB)
-endif ()
-
option(ENABLE_WXWIDGETS "Build with wxWidgets UI (Currently required)" ON)
set(THREADS_PREFER_PTHREAD_FLAG true)
@@ -179,6 +164,12 @@ if (UNIX AND NOT APPLE)
endif()
find_package(GTK3 REQUIRED)
+ if(ENABLE_BLUEZ)
+ find_package(bluez REQUIRED)
+ set(SUPPORTS_WIIMOTE ON)
+ add_compile_definitions(HAS_BLUEZ)
+ endif()
+
endif()
if (ENABLE_VULKAN)
@@ -197,7 +188,7 @@ endif()
if (ENABLE_HIDAPI)
find_package(hidapi REQUIRED)
- set(ENABLE_WIIMOTE ON)
+ set(SUPPORTS_WIIMOTE ON)
add_compile_definitions(HAS_HIDAPI)
endif ()
@@ -231,9 +222,18 @@ endif()
add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL)
+if (CMAKE_OSX_ARCHITECTURES)
+ set(CEMU_ARCHITECTURE ${CMAKE_OSX_ARCHITECTURES})
+else()
+ set(CEMU_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
+endif()
+if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
+ add_subdirectory("dependencies/xbyak_aarch64" EXCLUDE_FROM_ALL)
+endif()
+
find_package(ZArchive)
if (NOT ZArchive_FOUND)
add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL)
endif()
-add_subdirectory(src)
+add_subdirectory(src)
\ No newline at end of file
diff --git a/bin/resources/ar/cemu.mo b/bin/resources/ar/cemu.mo
new file mode 100644
index 00000000..4062628b
Binary files /dev/null and b/bin/resources/ar/cemu.mo differ
diff --git a/bin/resources/de/cemu.mo b/bin/resources/de/cemu.mo
index 8dc4e8cc..cd9edd3c 100644
Binary files a/bin/resources/de/cemu.mo and b/bin/resources/de/cemu.mo differ
diff --git a/bin/resources/ru/cemu.mo b/bin/resources/ru/cemu.mo
index 4ff04e2b..eb8f372f 100644
Binary files a/bin/resources/ru/cemu.mo and b/bin/resources/ru/cemu.mo differ
diff --git a/bin/resources/sv/cemu.mo b/bin/resources/sv/cemu.mo
index 3e850b36..c8fd68ee 100644
Binary files a/bin/resources/sv/cemu.mo and b/bin/resources/sv/cemu.mo differ
diff --git a/boost.natvis b/boost.natvis
new file mode 100644
index 00000000..2781a585
--- /dev/null
+++ b/boost.natvis
@@ -0,0 +1,26 @@
+
+
+
+
+
+ - m_holder.m_size
+
+ m_holder.m_size
+ m_holder.m_start
+
+
+
+
+
+ {{ size={m_holder.m_size} }}
+
+ - m_holder.m_size
+ - static_capacity
+
+ m_holder.m_size
+ ($T1*)m_holder.storage.data
+
+
+
+
+
diff --git a/cmake/Findbluez.cmake b/cmake/Findbluez.cmake
new file mode 100644
index 00000000..007cdac9
--- /dev/null
+++ b/cmake/Findbluez.cmake
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: 2022 Andrea Pappacoda
+# SPDX-License-Identifier: ISC
+
+find_package(bluez CONFIG)
+if (NOT bluez_FOUND)
+ find_package(PkgConfig)
+ if (PKG_CONFIG_FOUND)
+ pkg_search_module(bluez IMPORTED_TARGET GLOBAL bluez-1.0 bluez)
+ if (bluez_FOUND)
+ add_library(bluez::bluez ALIAS PkgConfig::bluez)
+ endif ()
+ endif ()
+endif ()
+
+find_package_handle_standard_args(bluez
+ REQUIRED_VARS
+ bluez_LINK_LIBRARIES
+ bluez_FOUND
+ VERSION_VAR bluez_VERSION
+)
diff --git a/dependencies/ih264d/CMakeLists.txt b/dependencies/ih264d/CMakeLists.txt
index 686a9d08..64ac0931 100644
--- a/dependencies/ih264d/CMakeLists.txt
+++ b/dependencies/ih264d/CMakeLists.txt
@@ -183,6 +183,9 @@ target_sources(ih264d PRIVATE
"decoder/arm/ih264d_function_selector.c"
)
target_compile_options(ih264d PRIVATE -DARMV8)
+if(APPLE)
+ target_sources(ih264d PRIVATE "common/armv8/macos_arm_symbol_aliases.s")
+endif()
else()
message(FATAL_ERROR "ih264d unknown architecture: ${IH264D_ARCHITECTURE}")
endif()
diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s
index 39c02560..c0d9cf99 100644
--- a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s
+++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s
@@ -429,8 +429,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
rev64 v7.4h, v2.4h
ld1 {v3.2s}, [x10]
sub x5, x3, #8
+#ifdef __APPLE__
+ adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE
+ ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF]
+#else
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1]
+#endif
usubl v10.8h, v5.8b, v1.8b
ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3
mov v8.d[1], v9.d[0]
@@ -484,10 +489,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
zip1 v1.8h, v0.8h, v2.8h
zip2 v2.8h, v0.8h, v2.8h
mov v0.16b, v1.16b
-
+#ifdef __APPLE__
+ adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE
+ ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF]
+#else
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2]
-
+#endif
ld1 {v8.2s, v9.2s}, [x12]
mov v8.d[1], v9.d[0]
mov v10.16b, v8.16b
diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s
index fa19c121..2422d8cd 100644
--- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s
+++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s
@@ -431,10 +431,13 @@ ih264_intra_pred_luma_16x16_mode_plane_av8:
mov x10, x1 //top_left
mov x4, #-1
ld1 {v2.2s}, [x1], x8
-
+#ifdef __APPLE__
+ adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE
+ ldr x7, [x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF]
+#else
adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs
ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs]
-
+#endif
ld1 {v0.2s}, [x1]
rev64 v2.8b, v2.8b
ld1 {v6.2s, v7.2s}, [x7]
diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s
index 273aa81b..6fa31ded 100644
--- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s
+++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s
@@ -1029,9 +1029,13 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8:
mov v3.d[0], v2.d[1]
ext v4.16b, v2.16b , v2.16b , #1
mov v5.d[0], v4.d[1]
-
+#ifdef __APPLE__
+ adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE
+ ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF]
+#else
adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u]
+#endif
uaddl v20.8h, v0.8b, v2.8b
uaddl v22.8h, v1.8b, v3.8b
uaddl v24.8h, v2.8b, v4.8b
diff --git a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s
index 475f690e..8d6aa995 100644
--- a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s
+++ b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s
@@ -142,14 +142,22 @@ ih264_weighted_bi_pred_luma_av8:
sxtw x4, w4
sxtw x5, w5
stp x19, x20, [sp, #-16]!
+#ifndef __APPLE__
ldr w8, [sp, #80] //Load wt2 in w8
ldr w9, [sp, #88] //Load ofst1 in w9
- add w6, w6, #1 //w6 = log_WD + 1
- neg w10, w6 //w10 = -(log_WD + 1)
- dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
ldr w10, [sp, #96] //Load ofst2 in w10
ldr w11, [sp, #104] //Load ht in w11
ldr w12, [sp, #112] //Load wd in w12
+#else
+ ldr w8, [sp, #80] //Load wt2 in w8
+ ldr w9, [sp, #84] //Load ofst1 in w9
+ ldr w10, [sp, #88] //Load ofst2 in w10
+ ldr w11, [sp, #92] //Load ht in w11
+ ldr w12, [sp, #96] //Load wd in w12
+#endif
+ add w6, w6, #1 //w6 = log_WD + 1
+ neg w10, w6 //w10 = -(log_WD + 1)
+ dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
add w9, w9, #1 //w9 = ofst1 + 1
add w9, w9, w10 //w9 = ofst1 + ofst2 + 1
mov v2.s[0], w7
@@ -424,17 +432,24 @@ ih264_weighted_bi_pred_chroma_av8:
sxtw x5, w5
stp x19, x20, [sp, #-16]!
-
+#ifndef __APPLE__
ldr w8, [sp, #80] //Load wt2 in w8
+ ldr w9, [sp, #88] //Load ofst1 in w9
+ ldr w10, [sp, #96] //Load ofst2 in w10
+ ldr w11, [sp, #104] //Load ht in w11
+ ldr w12, [sp, #112] //Load wd in w12
+#else
+ ldr w8, [sp, #80] //Load wt2 in w8
+ ldr w9, [sp, #84] //Load ofst1 in w9
+ ldr w10, [sp, #88] //Load ofst2 in w10
+ ldr w11, [sp, #92] //Load ht in w11
+ ldr w12, [sp, #96] //Load wd in w12
+#endif
dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit)
dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit)
add w6, w6, #1 //w6 = log_WD + 1
- ldr w9, [sp, #88] //Load ofst1 in w9
- ldr w10, [sp, #96] //Load ofst2 in w10
neg w20, w6 //w20 = -(log_WD + 1)
dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit)
- ldr w11, [sp, #104] //Load ht in x11
- ldr w12, [sp, #112] //Load wd in x12
dup v20.8h, w9 //0ffset1
dup v21.8h, w10 //0ffset2
srhadd v6.8b, v20.8b, v21.8b
diff --git a/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s b/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s
new file mode 100644
index 00000000..3639f1b3
--- /dev/null
+++ b/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s
@@ -0,0 +1,185 @@
+// macOS clang compilers append preceding underscores to function names, this is to prevent
+// mismatches with the assembly function names and the C functions as defined in the header.
+
+.global _ih264_deblk_chroma_horz_bs4_av8
+_ih264_deblk_chroma_horz_bs4_av8 = ih264_deblk_chroma_horz_bs4_av8
+
+.global _ih264_deblk_chroma_horz_bslt4_av8
+_ih264_deblk_chroma_horz_bslt4_av8 = ih264_deblk_chroma_horz_bslt4_av8
+
+.global _ih264_deblk_chroma_vert_bs4_av8
+_ih264_deblk_chroma_vert_bs4_av8 = ih264_deblk_chroma_vert_bs4_av8
+
+.global _ih264_deblk_chroma_vert_bslt4_av8
+_ih264_deblk_chroma_vert_bslt4_av8 = ih264_deblk_chroma_vert_bslt4_av8
+
+.global _ih264_deblk_luma_horz_bs4_av8
+_ih264_deblk_luma_horz_bs4_av8 = ih264_deblk_luma_horz_bs4_av8
+
+.global _ih264_deblk_luma_horz_bslt4_av8
+_ih264_deblk_luma_horz_bslt4_av8 = ih264_deblk_luma_horz_bslt4_av8
+
+.global _ih264_deblk_luma_vert_bs4_av8
+_ih264_deblk_luma_vert_bs4_av8 = ih264_deblk_luma_vert_bs4_av8
+
+.global _ih264_deblk_luma_vert_bslt4_av8
+_ih264_deblk_luma_vert_bslt4_av8 = ih264_deblk_luma_vert_bslt4_av8
+
+.global _ih264_default_weighted_pred_chroma_av8
+_ih264_default_weighted_pred_chroma_av8 = ih264_default_weighted_pred_chroma_av8
+
+.global _ih264_default_weighted_pred_luma_av8
+_ih264_default_weighted_pred_luma_av8 = ih264_default_weighted_pred_luma_av8
+
+.global _ih264_ihadamard_scaling_4x4_av8
+_ih264_ihadamard_scaling_4x4_av8 = ih264_ihadamard_scaling_4x4_av8
+
+.global _ih264_inter_pred_chroma_av8
+_ih264_inter_pred_chroma_av8 = ih264_inter_pred_chroma_av8
+
+.global _ih264_inter_pred_luma_copy_av8
+_ih264_inter_pred_luma_copy_av8 = ih264_inter_pred_luma_copy_av8
+
+.global _ih264_inter_pred_luma_horz_av8
+_ih264_inter_pred_luma_horz_av8 = ih264_inter_pred_luma_horz_av8
+
+.global _ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
+_ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
+
+.global _ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
+_ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
+
+.global _ih264_inter_pred_luma_horz_qpel_av8
+_ih264_inter_pred_luma_horz_qpel_av8 = ih264_inter_pred_luma_horz_qpel_av8
+
+.global _ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
+_ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
+
+.global _ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
+_ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
+
+.global _ih264_inter_pred_luma_vert_av8
+_ih264_inter_pred_luma_vert_av8 = ih264_inter_pred_luma_vert_av8
+
+.global _ih264_inter_pred_luma_vert_qpel_av8
+_ih264_inter_pred_luma_vert_qpel_av8 = ih264_inter_pred_luma_vert_qpel_av8
+
+.global _ih264_intra_pred_chroma_8x8_mode_horz_av8
+_ih264_intra_pred_chroma_8x8_mode_horz_av8 = ih264_intra_pred_chroma_8x8_mode_horz_av8
+
+.global _ih264_intra_pred_chroma_8x8_mode_plane_av8
+_ih264_intra_pred_chroma_8x8_mode_plane_av8 = ih264_intra_pred_chroma_8x8_mode_plane_av8
+
+.global _ih264_intra_pred_chroma_8x8_mode_vert_av8
+_ih264_intra_pred_chroma_8x8_mode_vert_av8 = ih264_intra_pred_chroma_8x8_mode_vert_av8
+
+.global _ih264_intra_pred_luma_16x16_mode_dc_av8
+_ih264_intra_pred_luma_16x16_mode_dc_av8 = ih264_intra_pred_luma_16x16_mode_dc_av8
+
+.global _ih264_intra_pred_luma_16x16_mode_horz_av8
+_ih264_intra_pred_luma_16x16_mode_horz_av8 = ih264_intra_pred_luma_16x16_mode_horz_av8
+
+.global _ih264_intra_pred_luma_16x16_mode_plane_av8
+_ih264_intra_pred_luma_16x16_mode_plane_av8 = ih264_intra_pred_luma_16x16_mode_plane_av8
+
+.global _ih264_intra_pred_luma_16x16_mode_vert_av8
+_ih264_intra_pred_luma_16x16_mode_vert_av8 = ih264_intra_pred_luma_16x16_mode_vert_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_dc_av8
+_ih264_intra_pred_luma_4x4_mode_dc_av8 = ih264_intra_pred_luma_4x4_mode_dc_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_diag_dl_av8
+_ih264_intra_pred_luma_4x4_mode_diag_dl_av8 = ih264_intra_pred_luma_4x4_mode_diag_dl_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_diag_dr_av8
+_ih264_intra_pred_luma_4x4_mode_diag_dr_av8 = ih264_intra_pred_luma_4x4_mode_diag_dr_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_horz_av8
+_ih264_intra_pred_luma_4x4_mode_horz_av8 = ih264_intra_pred_luma_4x4_mode_horz_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_horz_d_av8
+_ih264_intra_pred_luma_4x4_mode_horz_d_av8 = ih264_intra_pred_luma_4x4_mode_horz_d_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_horz_u_av8
+_ih264_intra_pred_luma_4x4_mode_horz_u_av8 = ih264_intra_pred_luma_4x4_mode_horz_u_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_vert_av8
+_ih264_intra_pred_luma_4x4_mode_vert_av8 = ih264_intra_pred_luma_4x4_mode_vert_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_vert_l_av8
+_ih264_intra_pred_luma_4x4_mode_vert_l_av8 = ih264_intra_pred_luma_4x4_mode_vert_l_av8
+
+.global _ih264_intra_pred_luma_4x4_mode_vert_r_av8
+_ih264_intra_pred_luma_4x4_mode_vert_r_av8 = ih264_intra_pred_luma_4x4_mode_vert_r_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_dc_av8
+_ih264_intra_pred_luma_8x8_mode_dc_av8 = ih264_intra_pred_luma_8x8_mode_dc_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_diag_dl_av8
+_ih264_intra_pred_luma_8x8_mode_diag_dl_av8 = ih264_intra_pred_luma_8x8_mode_diag_dl_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_diag_dr_av8
+_ih264_intra_pred_luma_8x8_mode_diag_dr_av8 = ih264_intra_pred_luma_8x8_mode_diag_dr_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_horz_av8
+_ih264_intra_pred_luma_8x8_mode_horz_av8 = ih264_intra_pred_luma_8x8_mode_horz_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_horz_d_av8
+_ih264_intra_pred_luma_8x8_mode_horz_d_av8 = ih264_intra_pred_luma_8x8_mode_horz_d_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_horz_u_av8
+_ih264_intra_pred_luma_8x8_mode_horz_u_av8 = ih264_intra_pred_luma_8x8_mode_horz_u_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_vert_av8
+_ih264_intra_pred_luma_8x8_mode_vert_av8 = ih264_intra_pred_luma_8x8_mode_vert_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_vert_l_av8
+_ih264_intra_pred_luma_8x8_mode_vert_l_av8 = ih264_intra_pred_luma_8x8_mode_vert_l_av8
+
+.global _ih264_intra_pred_luma_8x8_mode_vert_r_av8
+_ih264_intra_pred_luma_8x8_mode_vert_r_av8 = ih264_intra_pred_luma_8x8_mode_vert_r_av8
+
+.global _ih264_iquant_itrans_recon_4x4_av8
+_ih264_iquant_itrans_recon_4x4_av8 = ih264_iquant_itrans_recon_4x4_av8
+
+.global _ih264_iquant_itrans_recon_4x4_dc_av8
+_ih264_iquant_itrans_recon_4x4_dc_av8 = ih264_iquant_itrans_recon_4x4_dc_av8
+
+.global _ih264_iquant_itrans_recon_8x8_av8
+_ih264_iquant_itrans_recon_8x8_av8 = ih264_iquant_itrans_recon_8x8_av8
+
+.global _ih264_iquant_itrans_recon_8x8_dc_av8
+_ih264_iquant_itrans_recon_8x8_dc_av8 = ih264_iquant_itrans_recon_8x8_dc_av8
+
+.global _ih264_iquant_itrans_recon_chroma_4x4_av8
+_ih264_iquant_itrans_recon_chroma_4x4_av8 = ih264_iquant_itrans_recon_chroma_4x4_av8
+
+.global _ih264_iquant_itrans_recon_chroma_4x4_dc_av8
+_ih264_iquant_itrans_recon_chroma_4x4_dc_av8 = ih264_iquant_itrans_recon_chroma_4x4_dc_av8
+
+.global _ih264_pad_left_chroma_av8
+_ih264_pad_left_chroma_av8 = ih264_pad_left_chroma_av8
+
+.global _ih264_pad_left_luma_av8
+_ih264_pad_left_luma_av8 = ih264_pad_left_luma_av8
+
+.global _ih264_pad_right_chroma_av8
+_ih264_pad_right_chroma_av8 = ih264_pad_right_chroma_av8
+
+.global _ih264_pad_right_luma_av8
+_ih264_pad_right_luma_av8 = ih264_pad_right_luma_av8
+
+.global _ih264_pad_top_av8
+_ih264_pad_top_av8 = ih264_pad_top_av8
+
+.global _ih264_weighted_bi_pred_chroma_av8
+_ih264_weighted_bi_pred_chroma_av8 = ih264_weighted_bi_pred_chroma_av8
+
+.global _ih264_weighted_bi_pred_luma_av8
+_ih264_weighted_bi_pred_luma_av8 = ih264_weighted_bi_pred_luma_av8
+
+.global _ih264_weighted_pred_chroma_av8
+_ih264_weighted_pred_chroma_av8 = ih264_weighted_pred_chroma_av8
+
+.global _ih264_weighted_pred_luma_av8
+_ih264_weighted_pred_luma_av8 = ih264_weighted_pred_luma_av8
\ No newline at end of file
diff --git a/dependencies/vcpkg b/dependencies/vcpkg
index a4275b7e..533a5fda 160000
--- a/dependencies/vcpkg
+++ b/dependencies/vcpkg
@@ -1 +1 @@
-Subproject commit a4275b7eee79fb24ec2e135481ef5fce8b41c339
+Subproject commit 533a5fda5c0646d1771345fb572e759283444d5f
diff --git a/dependencies/xbyak_aarch64 b/dependencies/xbyak_aarch64
new file mode 160000
index 00000000..904b8923
--- /dev/null
+++ b/dependencies/xbyak_aarch64
@@ -0,0 +1 @@
+Subproject commit 904b8923457f3ec0d6f82ea2d6832a792851194d
diff --git a/dist/linux/info.cemu.Cemu.desktop b/dist/linux/info.cemu.Cemu.desktop
index 5003d4a6..6eeb0120 100644
--- a/dist/linux/info.cemu.Cemu.desktop
+++ b/dist/linux/info.cemu.Cemu.desktop
@@ -24,3 +24,4 @@ Comment[it]=Software per emulare giochi e applicazioni per Wii U su PC
Categories=Game;Emulator;
Keywords=Nintendo;
MimeType=application/x-wii-u-rom;
+StartupWMClass=Cemu
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7d64d91b..04b6dfdd 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -49,7 +49,6 @@ add_subdirectory(audio)
add_subdirectory(util)
add_subdirectory(imgui)
add_subdirectory(resource)
-add_subdirectory(asm)
add_executable(CemuBin
main.cpp
@@ -82,8 +81,8 @@ if (MACOS_BUNDLE)
set(MACOSX_BUNDLE_ICON_FILE "cemu.icns")
set(MACOSX_BUNDLE_GUI_IDENTIFIER "info.cemu.Cemu")
set(MACOSX_BUNDLE_BUNDLE_NAME "Cemu")
- set(MACOSX_BUNDLE_SHORT_VERSION_STRING ${CMAKE_PROJECT_VERSION})
- set(MACOSX_BUNDLE_BUNDLE_VERSION ${CMAKE_PROJECT_VERSION})
+ set(MACOSX_BUNDLE_SHORT_VERSION_STRING "${EMULATOR_VERSION_MAJOR}.${EMULATOR_VERSION_MINOR}.${EMULATOR_VERSION_PATCH}")
+ set(MACOSX_BUNDLE_BUNDLE_VERSION "${EMULATOR_VERSION_MAJOR}.${EMULATOR_VERSION_MINOR}.${EMULATOR_VERSION_PATCH}")
set(MACOSX_BUNDLE_COPYRIGHT "Copyright © 2024 Cemu Project")
set(MACOSX_BUNDLE_CATEGORY "public.app-category.games")
@@ -101,12 +100,26 @@ if (MACOS_BUNDLE)
COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory "${CMAKE_SOURCE_DIR}/bin/${folder}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/SharedSupport/${folder}")
endforeach(folder)
+ if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+ set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/debug/lib/libusb-1.0.0.dylib")
+ else()
+ set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib")
+ endif()
+
+ if (EXISTS "/usr/local/lib/libMoltenVK.dylib")
+ set(MOLTENVK_PATH "/usr/local/lib/libMoltenVK.dylib")
+ elseif (EXISTS "/opt/homebrew/lib/libMoltenVK.dylib")
+ set(MOLTENVK_PATH "/opt/homebrew/lib/libMoltenVK.dylib")
+ else()
+ message(FATAL_ERROR "failed to find libMoltenVK.dylib")
+ endif ()
+
add_custom_command (TARGET CemuBin POST_BUILD
- COMMAND ${CMAKE_COMMAND} ARGS -E copy "/usr/local/lib/libMoltenVK.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib"
- COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib"
+ COMMAND ${CMAKE_COMMAND} ARGS -E copy "${MOLTENVK_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib"
+ COMMAND ${CMAKE_COMMAND} ARGS -E copy "${LIBUSB_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib"
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_SOURCE_DIR}/src/resource/update.sh" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/update.sh"
COMMAND bash -c "install_name_tool -add_rpath @executable_path/../Frameworks ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}"
- COMMAND bash -c "install_name_tool -change /Users/runner/work/Cemu/Cemu/build/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib @executable_path/../Frameworks/libusb-1.0.0.dylib ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}")
+ COMMAND bash -c "install_name_tool -change ${LIBUSB_PATH} @executable_path/../Frameworks/libusb-1.0.0.dylib ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}")
endif()
set_target_properties(CemuBin PROPERTIES
diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt
index 91d257b2..2900059b 100644
--- a/src/Cafe/CMakeLists.txt
+++ b/src/Cafe/CMakeLists.txt
@@ -67,24 +67,31 @@ add_library(CemuCafe
HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
HW/Espresso/Recompiler/PPCRecompiler.cpp
HW/Espresso/Recompiler/PPCRecompiler.h
- HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
+ HW/Espresso/Recompiler/IML/IML.h
+ HW/Espresso/Recompiler/IML/IMLSegment.cpp
+ HW/Espresso/Recompiler/IML/IMLSegment.h
+ HW/Espresso/Recompiler/IML/IMLInstruction.cpp
+ HW/Espresso/Recompiler/IML/IMLInstruction.h
+ HW/Espresso/Recompiler/IML/IMLDebug.cpp
+ HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
+ HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
+ HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
+ HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
+ HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
+ HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp
HW/Espresso/Recompiler/PPCRecompilerIml.h
- HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
- HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
- HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
- HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp
- HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp
HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
- HW/Espresso/Recompiler/PPCRecompilerX64.h
- HW/Espresso/Recompiler/x64Emit.hpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
+ HW/Espresso/Recompiler/BackendX64/BackendX64.h
+ HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
+ HW/Espresso/Recompiler/BackendX64/x86Emitter.h
HW/Latte/Common/RegisterSerializer.cpp
HW/Latte/Common/RegisterSerializer.h
HW/Latte/Common/ShaderSerializer.cpp
@@ -463,12 +470,16 @@ add_library(CemuCafe
OS/libs/nsyshid/BackendEmulated.h
OS/libs/nsyshid/BackendLibusb.cpp
OS/libs/nsyshid/BackendLibusb.h
- OS/libs/nsyshid/BackendWindowsHID.cpp
- OS/libs/nsyshid/BackendWindowsHID.h
+ OS/libs/nsyshid/Dimensions.cpp
+ OS/libs/nsyshid/Dimensions.h
OS/libs/nsyshid/Infinity.cpp
OS/libs/nsyshid/Infinity.h
OS/libs/nsyshid/Skylander.cpp
OS/libs/nsyshid/Skylander.h
+ OS/libs/nsyshid/SkylanderXbox360.cpp
+ OS/libs/nsyshid/SkylanderXbox360.h
+ OS/libs/nsyshid/g721/g721.cpp
+ OS/libs/nsyshid/g721/g721.h
OS/libs/nsyskbd/nsyskbd.cpp
OS/libs/nsyskbd/nsyskbd.h
OS/libs/nsysnet/nsysnet.cpp
@@ -526,12 +537,25 @@ if(APPLE)
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm")
endif()
+if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
+ target_sources(CemuCafe PRIVATE
+ HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
+ HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
+ )
+ target_link_libraries(CemuCafe PRIVATE xbyak_aarch64)
+endif()
+
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>")
target_include_directories(CemuCafe PUBLIC "../")
+if (glslang_VERSION VERSION_LESS "15.0.0")
+ set(glslang_target "glslang::SPIRV")
+else()
+ set(glslang_target "glslang::glslang")
+endif()
+
target_link_libraries(CemuCafe PRIVATE
- CemuAsm
CemuAudio
CemuCommon
CemuComponents
@@ -545,7 +569,7 @@ target_link_libraries(CemuCafe PRIVATE
Boost::nowide
CURL::libcurl
fmt::fmt
- glslang::SPIRV
+ ${glslang_target}
ih264d
OpenSSL::Crypto
OpenSSL::SSL
@@ -561,15 +585,16 @@ if (ENABLE_WAYLAND)
target_link_libraries(CemuCafe PUBLIC Wayland::Client)
endif()
-if (ENABLE_NSYSHID_LIBUSB)
- if (ENABLE_VCPKG)
- find_package(PkgConfig REQUIRED)
- pkg_check_modules(libusb REQUIRED IMPORTED_TARGET libusb-1.0)
- target_link_libraries(CemuCafe PRIVATE PkgConfig::libusb)
- else ()
- find_package(libusb MODULE REQUIRED)
- target_link_libraries(CemuCafe PRIVATE libusb::libusb)
- endif ()
+if (ENABLE_VCPKG)
+ if(WIN32)
+ set(PKG_CONFIG_EXECUTABLE "${VCPKG_INSTALLED_DIR}/x64-windows/tools/pkgconf/pkgconf.exe")
+ endif()
+ find_package(PkgConfig REQUIRED)
+ pkg_check_modules(libusb REQUIRED IMPORTED_TARGET libusb-1.0)
+ target_link_libraries(CemuCafe PRIVATE PkgConfig::libusb)
+else ()
+ find_package(libusb MODULE REQUIRED)
+ target_link_libraries(CemuCafe PRIVATE libusb::libusb)
endif ()
if (ENABLE_WXWIDGETS)
diff --git a/src/Cafe/CafeSystem.cpp b/src/Cafe/CafeSystem.cpp
index 51de3550..d20ccd9d 100644
--- a/src/Cafe/CafeSystem.cpp
+++ b/src/Cafe/CafeSystem.cpp
@@ -9,6 +9,7 @@
#include "audio/IAudioAPI.h"
#include "audio/IAudioInputAPI.h"
#include "config/ActiveSettings.h"
+#include "config/LaunchSettings.h"
#include "Cafe/TitleList/GameInfo.h"
#include "Cafe/GraphicPack/GraphicPack2.h"
#include "util/helpers/SystemException.h"
@@ -637,40 +638,40 @@ namespace CafeSystem
fsc_unmount("/cemuBossStorage/", FSC_PRIORITY_BASE);
}
- STATUS_CODE LoadAndMountForegroundTitle(TitleId titleId)
+ PREPARE_STATUS_CODE LoadAndMountForegroundTitle(TitleId titleId)
{
cemuLog_log(LogType::Force, "Mounting title {:016x}", (uint64)titleId);
sGameInfo_ForegroundTitle = CafeTitleList::GetGameInfo(titleId);
if (!sGameInfo_ForegroundTitle.IsValid())
{
cemuLog_log(LogType::Force, "Mounting failed: Game meta information is either missing, inaccessible or not valid (missing or invalid .xml files in code and meta folder)");
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
}
// check base
TitleInfo& titleBase = sGameInfo_ForegroundTitle.GetBase();
if (!titleBase.IsValid())
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
if(!titleBase.ParseXmlInfo())
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
cemuLog_log(LogType::Force, "Base: {}", titleBase.GetPrintPath());
// mount base
if (!titleBase.Mount("/vol/content", "content", FSC_PRIORITY_BASE) || !titleBase.Mount(GetInternalVirtualCodeFolder(), "code", FSC_PRIORITY_BASE))
{
cemuLog_log(LogType::Force, "Mounting failed");
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
}
// check update
TitleInfo& titleUpdate = sGameInfo_ForegroundTitle.GetUpdate();
if (titleUpdate.IsValid())
{
if (!titleUpdate.ParseXmlInfo())
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
cemuLog_log(LogType::Force, "Update: {}", titleUpdate.GetPrintPath());
// mount update
if (!titleUpdate.Mount("/vol/content", "content", FSC_PRIORITY_PATCH) || !titleUpdate.Mount(GetInternalVirtualCodeFolder(), "code", FSC_PRIORITY_PATCH))
{
cemuLog_log(LogType::Force, "Mounting failed");
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
}
}
else
@@ -682,20 +683,20 @@ namespace CafeSystem
// todo - support for multi-title AOC
TitleInfo& titleAOC = aocList[0];
if (!titleAOC.ParseXmlInfo())
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
cemu_assert_debug(titleAOC.IsValid());
cemuLog_log(LogType::Force, "DLC: {}", titleAOC.GetPrintPath());
// mount AOC
if (!titleAOC.Mount(fmt::format("/vol/aoc{:016x}", titleAOC.GetAppTitleId()), "content", FSC_PRIORITY_PATCH))
{
cemuLog_log(LogType::Force, "Mounting failed");
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
}
}
else
cemuLog_log(LogType::Force, "DLC: Not present");
sForegroundTitleId = titleId;
- return STATUS_CODE::SUCCESS;
+ return PREPARE_STATUS_CODE::SUCCESS;
}
void UnmountForegroundTitle()
@@ -723,7 +724,7 @@ namespace CafeSystem
}
}
- STATUS_CODE SetupExecutable()
+ PREPARE_STATUS_CODE SetupExecutable()
{
// set rpx path from cos.xml if available
_pathToBaseExecutable = _pathToExecutable;
@@ -755,7 +756,7 @@ namespace CafeSystem
}
}
LoadMainExecutable();
- return STATUS_CODE::SUCCESS;
+ return PREPARE_STATUS_CODE::SUCCESS;
}
void SetupMemorySpace()
@@ -769,7 +770,7 @@ namespace CafeSystem
memory_unmapForCurrentTitle();
}
- STATUS_CODE PrepareForegroundTitle(TitleId titleId)
+ PREPARE_STATUS_CODE PrepareForegroundTitle(TitleId titleId)
{
CafeTitleList::WaitForMandatoryScan();
sLaunchModeIsStandalone = false;
@@ -780,21 +781,21 @@ namespace CafeSystem
// mount mlc storage
MountBaseDirectories();
// mount title folders
- STATUS_CODE r = LoadAndMountForegroundTitle(titleId);
- if (r != STATUS_CODE::SUCCESS)
+ PREPARE_STATUS_CODE r = LoadAndMountForegroundTitle(titleId);
+ if (r != PREPARE_STATUS_CODE::SUCCESS)
return r;
gameProfile_load();
// setup memory space and PPC recompiler
SetupMemorySpace();
PPCRecompiler_init();
r = SetupExecutable(); // load RPX
- if (r != STATUS_CODE::SUCCESS)
+ if (r != PREPARE_STATUS_CODE::SUCCESS)
return r;
InitVirtualMlcStorage();
- return STATUS_CODE::SUCCESS;
+ return PREPARE_STATUS_CODE::SUCCESS;
}
- STATUS_CODE PrepareForegroundTitleFromStandaloneRPX(const fs::path& path)
+ PREPARE_STATUS_CODE PrepareForegroundTitleFromStandaloneRPX(const fs::path& path)
{
sLaunchModeIsStandalone = true;
cemuLog_log(LogType::Force, "Launching executable in standalone mode due to incorrect layout or missing meta files");
@@ -812,7 +813,7 @@ namespace CafeSystem
if (!r)
{
cemuLog_log(LogType::Force, "Failed to mount {}", _pathToUtf8(contentPath));
- return STATUS_CODE::UNABLE_TO_MOUNT;
+ return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT;
}
}
}
@@ -824,7 +825,7 @@ namespace CafeSystem
// since a lot of systems (including save folder location) rely on a TitleId, we derive a placeholder id from the executable hash
auto execData = fsc_extractFile(_pathToExecutable.c_str());
if (!execData)
- return STATUS_CODE::INVALID_RPX;
+ return PREPARE_STATUS_CODE::INVALID_RPX;
uint32 h = generateHashFromRawRPXData(execData->data(), execData->size());
sForegroundTitleId = 0xFFFFFFFF00000000ULL | (uint64)h;
cemuLog_log(LogType::Force, "Generated placeholder TitleId: {:016x}", sForegroundTitleId);
@@ -834,7 +835,7 @@ namespace CafeSystem
// load executable
SetupExecutable();
InitVirtualMlcStorage();
- return STATUS_CODE::SUCCESS;
+ return PREPARE_STATUS_CODE::SUCCESS;
}
void _LaunchTitleThread()
@@ -843,7 +844,7 @@ namespace CafeSystem
module->TitleStart();
cemu_initForGame();
// enter scheduler
- if (ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler)
+ if ((ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler || LaunchSettings::ForceMultiCoreInterpreter()) && !LaunchSettings::ForceInterpreter())
coreinit::OSSchedulerBegin(3);
else
coreinit::OSSchedulerBegin(1);
diff --git a/src/Cafe/CafeSystem.h b/src/Cafe/CafeSystem.h
index c4043a59..e9de8d7d 100644
--- a/src/Cafe/CafeSystem.h
+++ b/src/Cafe/CafeSystem.h
@@ -15,20 +15,19 @@ namespace CafeSystem
virtual void CafeRecreateCanvas() = 0;
};
- enum class STATUS_CODE
+ enum class PREPARE_STATUS_CODE
{
SUCCESS,
INVALID_RPX,
UNABLE_TO_MOUNT, // failed to mount through TitleInfo (most likely caused by an invalid or outdated path)
- //BAD_META_DATA, - the title list only stores titles with valid meta, so this error code is impossible
};
void Initialize();
void SetImplementation(SystemImplementation* impl);
void Shutdown();
- STATUS_CODE PrepareForegroundTitle(TitleId titleId);
- STATUS_CODE PrepareForegroundTitleFromStandaloneRPX(const fs::path& path);
+ PREPARE_STATUS_CODE PrepareForegroundTitle(TitleId titleId);
+ PREPARE_STATUS_CODE PrepareForegroundTitleFromStandaloneRPX(const fs::path& path);
void LaunchForegroundTitle();
bool IsTitleRunning();
diff --git a/src/Cafe/Filesystem/FST/FST.cpp b/src/Cafe/Filesystem/FST/FST.cpp
index 570671d4..ec112b9a 100644
--- a/src/Cafe/Filesystem/FST/FST.cpp
+++ b/src/Cafe/Filesystem/FST/FST.cpp
@@ -3,8 +3,7 @@
#include "Cemu/ncrypto/ncrypto.h"
#include "Cafe/Filesystem/WUD/wud.h"
#include "util/crypto/aes128.h"
-#include "openssl/evp.h" /* EVP_Digest */
-#include "openssl/sha.h" /* SHA1 / SHA256_DIGEST_LENGTH */
+#include "openssl/sha.h" /* SHA1 / SHA256 */
#include "fstUtil.h"
#include "FST.h"
@@ -14,6 +13,8 @@
#define SET_FST_ERROR(__code) if (errorCodeOut) *errorCodeOut = ErrorCode::__code
+static_assert(sizeof(NCrypto::AesIv) == 16); // make sure IV is actually 16 bytes
+
class FSTDataSource
{
public:
@@ -141,7 +142,7 @@ struct DiscPartitionTableHeader
static constexpr uint32 MAGIC_VALUE = 0xCCA6E67B;
/* +0x00 */ uint32be magic;
- /* +0x04 */ uint32be sectorSize; // must be 0x8000?
+ /* +0x04 */ uint32be blockSize; // must be 0x8000?
/* +0x08 */ uint8 partitionTableHash[20]; // hash of the data range at +0x800 to end of sector (0x8000)
/* +0x1C */ uint32be numPartitions;
};
@@ -164,10 +165,10 @@ struct DiscPartitionHeader
static constexpr uint32 MAGIC_VALUE = 0xCC93A4F5;
/* +0x00 */ uint32be magic;
- /* +0x04 */ uint32be sectorSize; // must match DISC_SECTOR_SIZE
+ /* +0x04 */ uint32be sectorSize; // must match DISC_SECTOR_SIZE for hashed blocks
/* +0x08 */ uint32be ukn008;
- /* +0x0C */ uint32be ukn00C;
+ /* +0x0C */ uint32be ukn00C; // h3 array size?
/* +0x10 */ uint32be h3HashNum;
/* +0x14 */ uint32be fstSize; // in bytes
/* +0x18 */ uint32be fstSector; // relative to partition start
@@ -178,13 +179,15 @@ struct DiscPartitionHeader
/* +0x24 */ uint8 fstHashType;
/* +0x25 */ uint8 fstEncryptionType; // purpose of this isn't really understood. Maybe it controls which key is being used? (1 -> disc key, 2 -> partition key)
- /* +0x26 */ uint8 versionA;
- /* +0x27 */ uint8 ukn027; // also a version field?
+ /* +0x26 */ uint8be versionA;
+ /* +0x27 */ uint8be ukn027; // also a version field?
// there is an array at +0x40 ? Related to H3 list. Also related to value at +0x0C and h3HashNum
+ /* +0x28 */ uint8be _uknOrPadding028[0x18];
+ /* +0x40 */ uint8be h3HashArray[32]; // dynamic size. Only present if fstHashType != 0
};
-static_assert(sizeof(DiscPartitionHeader) == 0x28);
+static_assert(sizeof(DiscPartitionHeader) == 0x40+0x20);
bool FSTVolume::FindDiscKey(const fs::path& path, NCrypto::AesKey& discTitleKey)
{
@@ -269,7 +272,7 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d
cemuLog_log(LogType::Force, "Disc image rejected because decryption failed");
return nullptr;
}
- if (partitionHeader->sectorSize != DISC_SECTOR_SIZE)
+ if (partitionHeader->blockSize != DISC_SECTOR_SIZE)
{
cemuLog_log(LogType::Force, "Disc image rejected because partition sector size is invalid");
return nullptr;
@@ -336,6 +339,9 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d
cemu_assert_debug(partitionHeaderSI.fstEncryptionType == 1);
// todo - check other fields?
+ if(partitionHeaderSI.fstHashType == 0 && partitionHeaderSI.h3HashNum != 0)
+ cemuLog_log(LogType::Force, "FST: Partition uses unhashed blocks but stores a non-zero amount of H3 hashes");
+
// GM partition
DiscPartitionHeader partitionHeaderGM{};
if (!readPartitionHeader(partitionHeaderGM, gmPartitionIndex))
@@ -349,9 +355,10 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d
// if decryption is necessary
// load SI FST
dataSource->SetBaseOffset((uint64)partitionArray[siPartitionIndex].partitionAddress * DISC_SECTOR_SIZE);
- auto siFST = OpenFST(dataSource.get(), (uint64)partitionHeaderSI.fstSector * DISC_SECTOR_SIZE, partitionHeaderSI.fstSize, &discTitleKey, static_cast(partitionHeaderSI.fstHashType));
+ auto siFST = OpenFST(dataSource.get(), (uint64)partitionHeaderSI.fstSector * DISC_SECTOR_SIZE, partitionHeaderSI.fstSize, &discTitleKey, static_cast(partitionHeaderSI.fstHashType), nullptr);
if (!siFST)
return nullptr;
+ cemu_assert_debug(!(siFST->HashIsDisabled() && partitionHeaderSI.h3HashNum != 0)); // if hash is disabled, no H3 data may be present
// load ticket file for partition that we want to decrypt
NCrypto::ETicketParser ticketParser;
std::vector ticketData = siFST->ExtractFile(fmt::format("{:02x}/title.tik", gmPartitionIndex));
@@ -360,16 +367,32 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d
cemuLog_log(LogType::Force, "Disc image ticket file is invalid");
return nullptr;
}
+#if 0
+ // each SI partition seems to contain a title.tmd that we could parse and which should have information about the associated GM partition
+ // but the console seems to ignore this file for disc images, at least when mounting, so we shouldn't rely on it either
+ std::vector tmdData = siFST->ExtractFile(fmt::format("{:02x}/title.tmd", gmPartitionIndex));
+ if (tmdData.empty())
+ {
+ cemuLog_log(LogType::Force, "Disc image TMD file is missing");
+ return nullptr;
+ }
+ // parse TMD
+ NCrypto::TMDParser tmdParser;
+ if (!tmdParser.parse(tmdData.data(), tmdData.size()))
+ {
+ cemuLog_log(LogType::Force, "Disc image TMD file is invalid");
+ return nullptr;
+ }
+#endif
delete siFST;
-
NCrypto::AesKey gmTitleKey;
ticketParser.GetTitleKey(gmTitleKey);
-
// load GM partition
dataSource->SetBaseOffset((uint64)partitionArray[gmPartitionIndex].partitionAddress * DISC_SECTOR_SIZE);
- FSTVolume* r = OpenFST(std::move(dataSource), (uint64)partitionHeaderGM.fstSector * DISC_SECTOR_SIZE, partitionHeaderGM.fstSize, &gmTitleKey, static_cast(partitionHeaderGM.fstHashType));
+ FSTVolume* r = OpenFST(std::move(dataSource), (uint64)partitionHeaderGM.fstSector * DISC_SECTOR_SIZE, partitionHeaderGM.fstSize, &gmTitleKey, static_cast(partitionHeaderGM.fstHashType), nullptr);
if (r)
SET_FST_ERROR(OK);
+ cemu_assert_debug(!(r->HashIsDisabled() && partitionHeaderGM.h3HashNum != 0)); // if hash is disabled, no H3 data may be present
return r;
}
@@ -426,15 +449,15 @@ FSTVolume* FSTVolume::OpenFromContentFolder(fs::path folderPath, ErrorCode* erro
}
// load FST
// fstSize = size of first cluster?
- FSTVolume* fstVolume = FSTVolume::OpenFST(std::move(dataSource), 0, fstSize, &titleKey, fstHashMode);
+ FSTVolume* fstVolume = FSTVolume::OpenFST(std::move(dataSource), 0, fstSize, &titleKey, fstHashMode, &tmdParser);
if (fstVolume)
SET_FST_ERROR(OK);
return fstVolume;
}
-FSTVolume* FSTVolume::OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode)
+FSTVolume* FSTVolume::OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode, NCrypto::TMDParser* optionalTMD)
{
- cemu_assert_debug(fstHashMode != ClusterHashMode::RAW || fstHashMode != ClusterHashMode::RAW2);
+ cemu_assert_debug(fstHashMode != ClusterHashMode::RAW || fstHashMode != ClusterHashMode::RAW_STREAM);
if (fstSize < sizeof(FSTHeader))
return nullptr;
constexpr uint64 FST_CLUSTER_OFFSET = 0;
@@ -465,6 +488,34 @@ FSTVolume* FSTVolume::OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint3
clusterTable[i].offset = clusterDataTable[i].offset;
clusterTable[i].size = clusterDataTable[i].size;
clusterTable[i].hashMode = static_cast((uint8)clusterDataTable[i].hashMode);
+ clusterTable[i].hasContentHash = false; // from the TMD file (H4?)
+ }
+ // if the TMD is available (when opening .app files) we can use the extra info from it to validate unhashed clusters
+ // each content entry in the TMD corresponds to one cluster used by the FST
+ if(optionalTMD)
+ {
+ if(numCluster != optionalTMD->GetContentList().size())
+ {
+ cemuLog_log(LogType::Force, "FST: Number of clusters does not match TMD content list");
+ return nullptr;
+ }
+ auto& contentList = optionalTMD->GetContentList();
+ for(size_t i=0; im_offsetFactor = fstHeader->offsetFactor;
fstVolume->m_sectorSize = DISC_SECTOR_SIZE;
fstVolume->m_partitionTitlekey = *partitionTitleKey;
- std::swap(fstVolume->m_cluster, clusterTable);
- std::swap(fstVolume->m_entries, fstEntries);
- std::swap(fstVolume->m_nameStringTable, nameStringTable);
+ fstVolume->m_hashIsDisabled = fstHeader->hashIsDisabled != 0;
+ fstVolume->m_cluster = std::move(clusterTable);
+ fstVolume->m_entries = std::move(fstEntries);
+ fstVolume->m_nameStringTable = std::move(nameStringTable);
return fstVolume;
}
-FSTVolume* FSTVolume::OpenFST(std::unique_ptr dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode)
+FSTVolume* FSTVolume::OpenFST(std::unique_ptr dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode, NCrypto::TMDParser* optionalTMD)
{
FSTDataSource* ds = dataSource.release();
- FSTVolume* fstVolume = OpenFST(ds, fstOffset, fstSize, partitionTitleKey, fstHashMode);
+ FSTVolume* fstVolume = OpenFST(ds, fstOffset, fstSize, partitionTitleKey, fstHashMode, optionalTMD);
if (!fstVolume)
{
delete ds;
@@ -757,7 +809,7 @@ uint32 FSTVolume::ReadFile(FSTFileHandle& fileHandle, uint32 offset, uint32 size
return 0;
cemu_assert_debug(!HAS_FLAG(entry.GetFlags(), FSTEntry::FLAGS::FLAG_LINK));
FSTCluster& cluster = m_cluster[entry.fileInfo.clusterIndex];
- if (cluster.hashMode == ClusterHashMode::RAW || cluster.hashMode == ClusterHashMode::RAW2)
+ if (cluster.hashMode == ClusterHashMode::RAW || cluster.hashMode == ClusterHashMode::RAW_STREAM)
return ReadFile_HashModeRaw(entry.fileInfo.clusterIndex, entry, offset, size, dataOut);
else if (cluster.hashMode == ClusterHashMode::HASH_INTERLEAVED)
return ReadFile_HashModeHashed(entry.fileInfo.clusterIndex, entry, offset, size, dataOut);
@@ -765,87 +817,15 @@ uint32 FSTVolume::ReadFile(FSTFileHandle& fileHandle, uint32 offset, uint32 size
return 0;
}
-uint32 FSTVolume::ReadFile_HashModeRaw(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut)
-{
- const uint32 readSizeInput = readSize;
- uint8* dataOutU8 = (uint8*)dataOut;
- if (readOffset >= entry.fileInfo.fileSize)
- return 0;
- else if ((readOffset + readSize) >= entry.fileInfo.fileSize)
- readSize = (entry.fileInfo.fileSize - readOffset);
-
- const FSTCluster& cluster = m_cluster[clusterIndex];
- uint64 clusterOffset = (uint64)cluster.offset * m_sectorSize;
- uint64 absFileOffset = entry.fileInfo.fileOffset * m_offsetFactor + readOffset;
-
- // make sure the raw range we read is aligned to AES block size (16)
- uint64 readAddrStart = absFileOffset & ~0xF;
- uint64 readAddrEnd = (absFileOffset + readSize + 0xF) & ~0xF;
-
- bool usesInitialIV = readOffset < 16;
- if (!usesInitialIV)
- readAddrStart -= 16; // read previous AES block since we require it for the IV
- uint32 prePadding = (uint32)(absFileOffset - readAddrStart); // number of extra bytes we read before readOffset (for AES alignment and IV calculation)
- uint32 postPadding = (uint32)(readAddrEnd - (absFileOffset + readSize));
-
- uint8 readBuffer[64 * 1024];
- // read first chunk
- // if file read offset (readOffset) is within the first AES-block then use initial IV calculated from cluster index
- // otherwise read previous AES-block is the IV (AES-CBC)
- uint64 readAddrCurrent = readAddrStart;
- uint32 rawBytesToRead = (uint32)std::min((readAddrEnd - readAddrStart), (uint64)sizeof(readBuffer));
- if (m_dataSource->readData(clusterIndex, clusterOffset, readAddrCurrent, readBuffer, rawBytesToRead) != rawBytesToRead)
- {
- cemuLog_log(LogType::Force, "FST read error in raw content");
- return 0;
- }
- readAddrCurrent += rawBytesToRead;
-
- uint8 iv[16]{};
- if (usesInitialIV)
- {
- // for the first AES block, the IV is initialized from cluster index
- iv[0] = (uint8)(clusterIndex >> 8);
- iv[1] = (uint8)(clusterIndex >> 0);
- AES128_CBC_decrypt_updateIV(readBuffer, readBuffer, rawBytesToRead, m_partitionTitlekey.b, iv);
- std::memcpy(dataOutU8, readBuffer + prePadding, rawBytesToRead - prePadding - postPadding);
- dataOutU8 += (rawBytesToRead - prePadding - postPadding);
- readSize -= (rawBytesToRead - prePadding - postPadding);
- }
- else
- {
- // IV is initialized from previous AES block (AES-CBC)
- std::memcpy(iv, readBuffer, 16);
- AES128_CBC_decrypt_updateIV(readBuffer + 16, readBuffer + 16, rawBytesToRead - 16, m_partitionTitlekey.b, iv);
- std::memcpy(dataOutU8, readBuffer + prePadding, rawBytesToRead - prePadding - postPadding);
- dataOutU8 += (rawBytesToRead - prePadding - postPadding);
- readSize -= (rawBytesToRead - prePadding - postPadding);
- }
-
- // read remaining chunks
- while (readSize > 0)
- {
- uint32 bytesToRead = (uint32)std::min((uint32)sizeof(readBuffer), readSize);
- uint32 alignedBytesToRead = (bytesToRead + 15) & ~0xF;
- if (m_dataSource->readData(clusterIndex, clusterOffset, readAddrCurrent, readBuffer, alignedBytesToRead) != alignedBytesToRead)
- {
- cemuLog_log(LogType::Force, "FST read error in raw content");
- return 0;
- }
- AES128_CBC_decrypt_updateIV(readBuffer, readBuffer, alignedBytesToRead, m_partitionTitlekey.b, iv);
- std::memcpy(dataOutU8, readBuffer, bytesToRead);
- dataOutU8 += bytesToRead;
- readSize -= bytesToRead;
- readAddrCurrent += alignedBytesToRead;
- }
-
- return readSizeInput - readSize;
-}
-
constexpr size_t BLOCK_SIZE = 0x10000;
constexpr size_t BLOCK_HASH_SIZE = 0x0400;
constexpr size_t BLOCK_FILE_SIZE = 0xFC00;
+struct FSTRawBlock
+{
+ std::vector rawData; // unhashed block size depends on sector size field in partition header
+};
+
struct FSTHashedBlock
{
uint8 rawData[BLOCK_SIZE];
@@ -887,12 +867,160 @@ struct FSTHashedBlock
static_assert(sizeof(FSTHashedBlock) == BLOCK_SIZE);
+struct FSTCachedRawBlock
+{
+ FSTRawBlock blockData;
+ NCrypto::AesIv ivForNextBlock;
+ uint64 lastAccess;
+};
+
struct FSTCachedHashedBlock
{
FSTHashedBlock blockData;
uint64 lastAccess;
};
+// Checks cache fill state and if necessary drops least recently accessed block from the cache. Optionally allows to recycle the released cache entry to cut down cost of memory allocation and clearing
+void FSTVolume::TrimCacheIfRequired(FSTCachedRawBlock** droppedRawBlock, FSTCachedHashedBlock** droppedHashedBlock)
+{
+ // calculate size used by cache
+ size_t cacheSize = 0;
+ for (auto& itr : m_cacheDecryptedRawBlocks)
+ cacheSize += itr.second->blockData.rawData.size();
+ for (auto& itr : m_cacheDecryptedHashedBlocks)
+ cacheSize += sizeof(FSTCachedHashedBlock) + sizeof(FSTHashedBlock);
+ // only trim if cache is full (larger than 2MB)
+ if (cacheSize < 2*1024*1024) // 2MB
+ return;
+ // scan both cache lists to find least recently accessed block to drop
+ auto dropRawItr = std::min_element(m_cacheDecryptedRawBlocks.begin(), m_cacheDecryptedRawBlocks.end(), [](const auto& a, const auto& b) -> bool
+ { return a.second->lastAccess < b.second->lastAccess; });
+ auto dropHashedItr = std::min_element(m_cacheDecryptedHashedBlocks.begin(), m_cacheDecryptedHashedBlocks.end(), [](const auto& a, const auto& b) -> bool
+ { return a.second->lastAccess < b.second->lastAccess; });
+ uint64 lastAccess = std::numeric_limits::max();
+ if(dropRawItr != m_cacheDecryptedRawBlocks.end())
+ lastAccess = dropRawItr->second->lastAccess;
+ if(dropHashedItr != m_cacheDecryptedHashedBlocks.end())
+ lastAccess = std::min(lastAccess, dropHashedItr->second->lastAccess);
+ if(dropRawItr != m_cacheDecryptedRawBlocks.end() && dropRawItr->second->lastAccess == lastAccess)
+ {
+ if (droppedRawBlock)
+ *droppedRawBlock = dropRawItr->second;
+ else
+ delete dropRawItr->second;
+ m_cacheDecryptedRawBlocks.erase(dropRawItr);
+ return;
+ }
+ else if(dropHashedItr != m_cacheDecryptedHashedBlocks.end() && dropHashedItr->second->lastAccess == lastAccess)
+ {
+ if (droppedHashedBlock)
+ *droppedHashedBlock = dropHashedItr->second;
+ else
+ delete dropHashedItr->second;
+ m_cacheDecryptedHashedBlocks.erase(dropHashedItr);
+ }
+}
+
+void FSTVolume::DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, NCrypto::AesIv& ivOut)
+{
+ ivOut = {};
+ if(blockIndex == 0)
+ {
+ ivOut.iv[0] = (uint8)(clusterIndex >> 8);
+ ivOut.iv[1] = (uint8)(clusterIndex >> 0);
+ }
+ else
+ {
+ // the last 16 encrypted bytes of the previous block are the IV (AES CBC)
+ // if the previous block is cached we can grab the IV from there. Otherwise we have to read the 16 bytes from the data source
+ uint32 prevBlockIndex = blockIndex - 1;
+ uint64 cacheBlockId = ((uint64)clusterIndex << (64 - 16)) | (uint64)prevBlockIndex;
+ auto itr = m_cacheDecryptedRawBlocks.find(cacheBlockId);
+ if (itr != m_cacheDecryptedRawBlocks.end())
+ {
+ ivOut = itr->second->ivForNextBlock;
+ }
+ else
+ {
+ cemu_assert(m_sectorSize >= NCrypto::AesIv::SIZE);
+ uint64 clusterOffset = (uint64)m_cluster[clusterIndex].offset * m_sectorSize;
+ NCrypto::AesIv prevIV{};
+ if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * m_sectorSize - NCrypto::AesIv::SIZE, prevIV.iv, NCrypto::AesIv::SIZE) != NCrypto::AesIv::SIZE)
+ {
+ cemuLog_log(LogType::Force, "Failed to read IV for raw FST block");
+ m_detectedCorruption = true;
+ return;
+ }
+ ivOut = prevIV;
+ }
+ }
+}
+
+FSTCachedRawBlock* FSTVolume::GetDecryptedRawBlock(uint32 clusterIndex, uint32 blockIndex)
+{
+ FSTCluster& cluster = m_cluster[clusterIndex];
+ uint64 clusterOffset = (uint64)cluster.offset * m_sectorSize;
+ // generate id for cache
+ uint64 cacheBlockId = ((uint64)clusterIndex << (64 - 16)) | (uint64)blockIndex;
+ // lookup block in cache
+ FSTCachedRawBlock* block = nullptr;
+ auto itr = m_cacheDecryptedRawBlocks.find(cacheBlockId);
+ if (itr != m_cacheDecryptedRawBlocks.end())
+ {
+ block = itr->second;
+ block->lastAccess = ++m_cacheAccessCounter;
+ return block;
+ }
+ // if cache already full, drop least recently accessed block and recycle FSTCachedRawBlock object if possible
+ TrimCacheIfRequired(&block, nullptr);
+ if (!block)
+ block = new FSTCachedRawBlock();
+ block->blockData.rawData.resize(m_sectorSize);
+ // block not cached, read new
+ block->lastAccess = ++m_cacheAccessCounter;
+ if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * m_sectorSize, block->blockData.rawData.data(), m_sectorSize) != m_sectorSize)
+ {
+ cemuLog_log(LogType::Force, "Failed to read raw FST block");
+ delete block;
+ m_detectedCorruption = true;
+ return nullptr;
+ }
+ // decrypt hash data
+ NCrypto::AesIv iv{};
+ DetermineUnhashedBlockIV(clusterIndex, blockIndex, iv);
+ std::copy(block->blockData.rawData.data() + m_sectorSize - NCrypto::AesIv::SIZE, block->blockData.rawData.data() + m_sectorSize, block->ivForNextBlock.iv);
+ AES128_CBC_decrypt(block->blockData.rawData.data(), block->blockData.rawData.data(), m_sectorSize, m_partitionTitlekey.b, iv.iv);
+ // if this is the next block, then hash it
+ if(cluster.hasContentHash)
+ {
+ if(cluster.singleHashNumBlocksHashed == blockIndex)
+ {
+ cemu_assert_debug(!(cluster.contentSize % m_sectorSize)); // size should be multiple of sector size? Regardless, the hashing code below can handle non-aligned sizes
+ bool isLastBlock = blockIndex == (std::max(cluster.contentSize / m_sectorSize, 1) - 1);
+ uint32 hashSize = m_sectorSize;
+ if(isLastBlock)
+ hashSize = cluster.contentSize - (uint64)blockIndex*m_sectorSize;
+ EVP_DigestUpdate(cluster.singleHashCtx.get(), block->blockData.rawData.data(), hashSize);
+ cluster.singleHashNumBlocksHashed++;
+ if(isLastBlock)
+ {
+ uint8 hash[32];
+ EVP_DigestFinal_ex(cluster.singleHashCtx.get(), hash, nullptr);
+ if(memcmp(hash, cluster.contentHash32, cluster.contentHashIsSHA1 ? 20 : 32) != 0)
+ {
+ cemuLog_log(LogType::Force, "FST: Raw section hash mismatch");
+ delete block;
+ m_detectedCorruption = true;
+ return nullptr;
+ }
+ }
+ }
+ }
+ // register in cache
+ m_cacheDecryptedRawBlocks.emplace(cacheBlockId, block);
+ return block;
+}
+
FSTCachedHashedBlock* FSTVolume::GetDecryptedHashedBlock(uint32 clusterIndex, uint32 blockIndex)
{
const FSTCluster& cluster = m_cluster[clusterIndex];
@@ -908,22 +1036,17 @@ FSTCachedHashedBlock* FSTVolume::GetDecryptedHashedBlock(uint32 clusterIndex, ui
block->lastAccess = ++m_cacheAccessCounter;
return block;
}
- // if cache already full, drop least recently accessed block (but recycle the FSTHashedBlock* object)
- if (m_cacheDecryptedHashedBlocks.size() >= 16)
- {
- auto dropItr = std::min_element(m_cacheDecryptedHashedBlocks.begin(), m_cacheDecryptedHashedBlocks.end(), [](const auto& a, const auto& b) -> bool
- { return a.second->lastAccess < b.second->lastAccess; });
- block = dropItr->second;
- m_cacheDecryptedHashedBlocks.erase(dropItr);
- }
- else
+ // if cache already full, drop least recently accessed block and recycle FSTCachedHashedBlock object if possible
+ TrimCacheIfRequired(nullptr, &block);
+ if (!block)
block = new FSTCachedHashedBlock();
// block not cached, read new
block->lastAccess = ++m_cacheAccessCounter;
if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * BLOCK_SIZE, block->blockData.rawData, BLOCK_SIZE) != BLOCK_SIZE)
{
- cemuLog_log(LogType::Force, "Failed to read FST block");
+ cemuLog_log(LogType::Force, "Failed to read hashed FST block");
delete block;
+ m_detectedCorruption = true;
return nullptr;
}
// decrypt hash data
@@ -931,11 +1054,46 @@ FSTCachedHashedBlock* FSTVolume::GetDecryptedHashedBlock(uint32 clusterIndex, ui
AES128_CBC_decrypt(block->blockData.getHashData(), block->blockData.getHashData(), BLOCK_HASH_SIZE, m_partitionTitlekey.b, iv);
// decrypt file data
AES128_CBC_decrypt(block->blockData.getFileData(), block->blockData.getFileData(), BLOCK_FILE_SIZE, m_partitionTitlekey.b, block->blockData.getH0Hash(blockIndex%16));
+ // compare with H0 to verify data integrity
+ NCrypto::CHash160 h0;
+ SHA1(block->blockData.getFileData(), BLOCK_FILE_SIZE, h0.b);
+ uint32 h0Index = (blockIndex % 4096);
+ if (memcmp(h0.b, block->blockData.getH0Hash(h0Index & 0xF), sizeof(h0.b)) != 0)
+ {
+ cemuLog_log(LogType::Force, "FST: Hash H0 mismatch in hashed block (section {} index {})", clusterIndex, blockIndex);
+ delete block;
+ m_detectedCorruption = true;
+ return nullptr;
+ }
// register in cache
m_cacheDecryptedHashedBlocks.emplace(cacheBlockId, block);
return block;
}
+uint32 FSTVolume::ReadFile_HashModeRaw(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut)
+{
+ uint8* dataOutU8 = (uint8*)dataOut;
+ if (readOffset >= entry.fileInfo.fileSize)
+ return 0;
+ else if ((readOffset + readSize) >= entry.fileInfo.fileSize)
+ readSize = (entry.fileInfo.fileSize - readOffset);
+ uint64 absFileOffset = entry.fileInfo.fileOffset * m_offsetFactor + readOffset;
+ uint32 remainingReadSize = readSize;
+ while (remainingReadSize > 0)
+ {
+ const FSTCachedRawBlock* rawBlock = this->GetDecryptedRawBlock(clusterIndex, absFileOffset/m_sectorSize);
+ if (!rawBlock)
+ break;
+ uint32 blockOffset = (uint32)(absFileOffset % m_sectorSize);
+ uint32 bytesToRead = std::min(remainingReadSize, m_sectorSize - blockOffset);
+ std::memcpy(dataOutU8, rawBlock->blockData.rawData.data() + blockOffset, bytesToRead);
+ dataOutU8 += bytesToRead;
+ remainingReadSize -= bytesToRead;
+ absFileOffset += bytesToRead;
+ }
+ return readSize - remainingReadSize;
+}
+
uint32 FSTVolume::ReadFile_HashModeHashed(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut)
{
/*
@@ -966,7 +1124,6 @@ uint32 FSTVolume::ReadFile_HashModeHashed(uint32 clusterIndex, FSTEntry& entry,
*/
const FSTCluster& cluster = m_cluster[clusterIndex];
- uint64 clusterBaseOffset = (uint64)cluster.offset * m_sectorSize;
uint64 fileReadOffset = entry.fileInfo.fileOffset * m_offsetFactor + readOffset;
uint32 blockIndex = (uint32)(fileReadOffset / BLOCK_FILE_SIZE);
uint32 bytesRemaining = readSize;
@@ -1019,6 +1176,8 @@ bool FSTVolume::Next(FSTDirectoryIterator& directoryIterator, FSTFileHandle& fil
FSTVolume::~FSTVolume()
{
+ for (auto& itr : m_cacheDecryptedRawBlocks)
+ delete itr.second;
for (auto& itr : m_cacheDecryptedHashedBlocks)
delete itr.second;
if (m_sourceIsOwned)
@@ -1115,4 +1274,4 @@ bool FSTVerifier::VerifyHashedContentFile(FileStream* fileContent, const NCrypto
void FSTVolumeTest()
{
FSTPathUnitTest();
-}
\ No newline at end of file
+}
diff --git a/src/Cafe/Filesystem/FST/FST.h b/src/Cafe/Filesystem/FST/FST.h
index 24fc39ea..26201c32 100644
--- a/src/Cafe/Filesystem/FST/FST.h
+++ b/src/Cafe/Filesystem/FST/FST.h
@@ -1,5 +1,6 @@
#pragma once
#include "Cemu/ncrypto/ncrypto.h"
+#include "openssl/evp.h"
struct FSTFileHandle
{
@@ -45,6 +46,7 @@ public:
~FSTVolume();
uint32 GetFileCount() const;
+ bool HasCorruption() const { return m_detectedCorruption; }
bool OpenFile(std::string_view path, FSTFileHandle& fileHandleOut, bool openOnlyFiles = false);
@@ -81,20 +83,29 @@ public:
}
private:
-
/* FST data (in memory) */
enum class ClusterHashMode : uint8
{
RAW = 0, // raw data + encryption, no hashing?
- RAW2 = 1, // raw data + encryption, with hash stored in tmd?
+ RAW_STREAM = 1, // raw data + encryption, with hash stored in tmd?
HASH_INTERLEAVED = 2, // hashes + raw interleaved in 0x10000 blocks (0x400 bytes of hashes at the beginning, followed by 0xFC00 bytes of data)
};
struct FSTCluster
{
+ FSTCluster() : singleHashCtx(nullptr, &EVP_MD_CTX_free) {}
+
uint32 offset;
uint32 size;
ClusterHashMode hashMode;
+ // extra data if TMD is available
+ bool hasContentHash;
+ uint8 contentHash32[32];
+ bool contentHashIsSHA1; // if true then it's SHA1 (with extra bytes zeroed out), otherwise it's SHA256
+ uint64 contentSize; // size of the content (in blocks)
+ // hash context for single hash mode (content hash must be available)
+ std::unique_ptr singleHashCtx; // unique_ptr to make this move-only
+ uint32 singleHashNumBlocksHashed{0};
};
struct FSTEntry
@@ -164,17 +175,30 @@ private:
bool m_sourceIsOwned{};
uint32 m_sectorSize{}; // for cluster offsets
uint32 m_offsetFactor{}; // for file offsets
+ bool m_hashIsDisabled{}; // disables hash verification (for all clusters of this volume?)
std::vector m_cluster;
std::vector m_entries;
std::vector m_nameStringTable;
NCrypto::AesKey m_partitionTitlekey;
+ bool m_detectedCorruption{false};
- /* Cache for decrypted hashed blocks */
+ bool HashIsDisabled() const
+ {
+ return m_hashIsDisabled;
+ }
+
+ /* Cache for decrypted raw and hashed blocks */
+ std::unordered_map m_cacheDecryptedRawBlocks;
std::unordered_map m_cacheDecryptedHashedBlocks;
uint64 m_cacheAccessCounter{};
+ void DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, NCrypto::AesIv& ivOut);
+
+ struct FSTCachedRawBlock* GetDecryptedRawBlock(uint32 clusterIndex, uint32 blockIndex);
struct FSTCachedHashedBlock* GetDecryptedHashedBlock(uint32 clusterIndex, uint32 blockIndex);
+ void TrimCacheIfRequired(struct FSTCachedRawBlock** droppedRawBlock, struct FSTCachedHashedBlock** droppedHashedBlock);
+
/* File reading */
uint32 ReadFile_HashModeRaw(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut);
uint32 ReadFile_HashModeHashed(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut);
@@ -185,7 +209,10 @@ private:
/* +0x00 */ uint32be magic;
/* +0x04 */ uint32be offsetFactor;
/* +0x08 */ uint32be numCluster;
- /* +0x0C */ uint32be ukn0C;
+ /* +0x0C */ uint8be hashIsDisabled;
+ /* +0x0D */ uint8be ukn0D;
+ /* +0x0E */ uint8be ukn0E;
+ /* +0x0F */ uint8be ukn0F;
/* +0x10 */ uint32be ukn10;
/* +0x14 */ uint32be ukn14;
/* +0x18 */ uint32be ukn18;
@@ -262,8 +289,8 @@ private:
static_assert(sizeof(FSTHeader_FileEntry) == 0x10);
- static FSTVolume* OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode);
- static FSTVolume* OpenFST(std::unique_ptr dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode);
+ static FSTVolume* OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode, NCrypto::TMDParser* optionalTMD);
+ static FSTVolume* OpenFST(std::unique_ptr dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode, NCrypto::TMDParser* optionalTMD);
static bool ProcessFST(FSTHeader_FileEntry* fileTable, uint32 numFileEntries, uint32 numCluster, std::vector& nameStringTable, std::vector& fstEntries);
bool MatchFSTEntryName(FSTEntry& entry, std::string_view comparedName)
diff --git a/src/Cafe/GameProfile/GameProfile.cpp b/src/Cafe/GameProfile/GameProfile.cpp
index ee92107a..ea303226 100644
--- a/src/Cafe/GameProfile/GameProfile.cpp
+++ b/src/Cafe/GameProfile/GameProfile.cpp
@@ -140,7 +140,7 @@ bool gameProfile_loadEnumOption(IniParser& iniParser, const char* optionName, T&
for(const T& v : T())
{
// test integer option
- if (boost::iequals(fmt::format("{}", static_cast::type>(v)), *option_value))
+ if (boost::iequals(fmt::format("{}", fmt::underlying(v)), *option_value))
{
option = v;
return true;
diff --git a/src/Cafe/GraphicPack/GraphicPack2.cpp b/src/Cafe/GraphicPack/GraphicPack2.cpp
index c54c31cb..6ae05c5b 100644
--- a/src/Cafe/GraphicPack/GraphicPack2.cpp
+++ b/src/Cafe/GraphicPack/GraphicPack2.cpp
@@ -345,7 +345,7 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
const auto preset_name = rules.FindOption("name");
if (!preset_name)
{
- cemuLog_log(LogType::Force, "Graphic pack \"{}\": Preset in line {} skipped because it has no name option defined", m_name, rules.GetCurrentSectionLineNumber());
+ cemuLog_log(LogType::Force, "Graphic pack \"{}\": Preset in line {} skipped because it has no name option defined", GetNormalizedPathString(), rules.GetCurrentSectionLineNumber());
continue;
}
@@ -369,7 +369,7 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
}
catch (const std::exception & ex)
{
- cemuLog_log(LogType::Force, "Graphic pack \"{}\": Can't parse preset \"{}\": {}", m_name, *preset_name, ex.what());
+ cemuLog_log(LogType::Force, "Graphic pack \"{}\": Can't parse preset \"{}\": {}", GetNormalizedPathString(), *preset_name, ex.what());
}
}
else if (boost::iequals(currentSectionName, "RAM"))
@@ -383,7 +383,7 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
{
if (m_version <= 5)
{
- cemuLog_log(LogType::Force, "Graphic pack \"{}\": [RAM] options are only available for graphic pack version 6 or higher", m_name, optionNameBuf);
+ cemuLog_log(LogType::Force, "Graphic pack \"{}\": [RAM] options are only available for graphic pack version 6 or higher", GetNormalizedPathString(), optionNameBuf);
throw std::exception();
}
@@ -393,12 +393,12 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
{
if (addrEnd <= addrStart)
{
- cemuLog_log(LogType::Force, "Graphic pack \"{}\": start address (0x{:08x}) must be greater than end address (0x{:08x}) for {}", m_name, addrStart, addrEnd, optionNameBuf);
+ cemuLog_log(LogType::Force, "Graphic pack \"{}\": start address (0x{:08x}) must be greater than end address (0x{:08x}) for {}", GetNormalizedPathString(), addrStart, addrEnd, optionNameBuf);
throw std::exception();
}
else if ((addrStart & 0xFFF) != 0 || (addrEnd & 0xFFF) != 0)
{
- cemuLog_log(LogType::Force, "Graphic pack \"{}\": addresses for %s are not aligned to 0x1000", m_name, optionNameBuf);
+ cemuLog_log(LogType::Force, "Graphic pack \"{}\": addresses for %s are not aligned to 0x1000", GetNormalizedPathString(), optionNameBuf);
throw std::exception();
}
else
@@ -408,7 +408,7 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
}
else
{
- cemuLog_log(LogType::Force, "Graphic pack \"{}\": has invalid syntax for option {}", m_name, optionNameBuf);
+ cemuLog_log(LogType::Force, "Graphic pack \"{}\": has invalid syntax for option {}", GetNormalizedPathString(), optionNameBuf);
throw std::exception();
}
}
@@ -422,24 +422,32 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
std::unordered_map> tmp_map;
// all vars must be defined in the default preset vars before
- for (const auto& entry : m_presets)
+ std::vector> mismatchingPresetVars;
+ for (const auto& presetEntry : m_presets)
{
- tmp_map[entry->category].emplace_back(entry);
+ tmp_map[presetEntry->category].emplace_back(presetEntry);
- for (auto& kv : entry->variables)
+ for (auto& presetVar : presetEntry->variables)
{
- const auto it = m_preset_vars.find(kv.first);
+ const auto it = m_preset_vars.find(presetVar.first);
if (it == m_preset_vars.cend())
{
- cemuLog_log(LogType::Force, "Graphic pack: \"{}\" contains preset variables which are not defined in the default section", m_name);
- throw std::exception();
+ mismatchingPresetVars.emplace_back(presetEntry->name, presetVar.first);
+ continue;
}
-
// overwrite var type with default var type
- kv.second.first = it->second.first;
+ presetVar.second.first = it->second.first;
}
}
+ if(!mismatchingPresetVars.empty())
+ {
+ cemuLog_log(LogType::Force, "Graphic pack \"{}\" contains preset variables which are not defined in the [Default] section:", GetNormalizedPathString());
+ for (const auto& [presetName, varName] : mismatchingPresetVars)
+ cemuLog_log(LogType::Force, "Preset: {} Variable: {}", presetName, varName);
+ throw std::exception();
+ }
+
// have first entry be default active for every category if no default= is set
for(auto entry : get_values(tmp_map))
{
@@ -469,7 +477,7 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
auto& p2 = kv.second[i + 1];
if (p1->variables.size() != p2->variables.size())
{
- cemuLog_log(LogType::Force, "Graphic pack: \"{}\" contains inconsistent preset variables", m_name);
+ cemuLog_log(LogType::Force, "Graphic pack: \"{}\" contains inconsistent preset variables", GetNormalizedPathString());
throw std::exception();
}
@@ -477,14 +485,14 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
std::set keys2(get_keys(p2->variables).begin(), get_keys(p2->variables).end());
if (keys1 != keys2)
{
- cemuLog_log(LogType::Force, "Graphic pack: \"{}\" contains inconsistent preset variables", m_name);
+ cemuLog_log(LogType::Force, "Graphic pack: \"{}\" contains inconsistent preset variables", GetNormalizedPathString());
throw std::exception();
}
if(p1->is_default)
{
if(has_default)
- cemuLog_log(LogType::Force, "Graphic pack: \"{}\" has more than one preset with the default key set for the same category \"{}\"", m_name, p1->name);
+ cemuLog_log(LogType::Force, "Graphic pack: \"{}\" has more than one preset with the default key set for the same category \"{}\"", GetNormalizedPathString(), p1->name);
p1->active = true;
has_default = true;
}
@@ -813,7 +821,7 @@ void GraphicPack2::AddConstantsForCurrentPreset(ExpressionParser& ep)
}
}
-void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC)
+void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC, const char* virtualMountBase)
{
uint64 currentTitleId = CafeSystem::GetForegroundTitleId();
uint64 aocTitleId = (currentTitleId & 0xFFFFFFFFull) | 0x0005000c00000000ull;
@@ -828,7 +836,7 @@ void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC
}
else
{
- virtualMountPath = fs::path("vol/content/") / virtualMountPath;
+ virtualMountPath = fs::path(virtualMountBase) / virtualMountPath;
}
fscDeviceRedirect_add(virtualMountPath.generic_string(), it.file_size(), it.path().generic_string(), m_fs_priority);
}
@@ -853,7 +861,7 @@ void GraphicPack2::LoadReplacedFiles()
{
// setup redirections
fscDeviceRedirect_map();
- _iterateReplacedFiles(contentPath, false);
+ _iterateReplacedFiles(contentPath, false, "vol/content/");
}
// /aoc/
fs::path aocPath(gfxPackPath);
@@ -866,7 +874,18 @@ void GraphicPack2::LoadReplacedFiles()
aocTitleId |= 0x0005000c00000000ULL;
// setup redirections
fscDeviceRedirect_map();
- _iterateReplacedFiles(aocPath, true);
+ _iterateReplacedFiles(aocPath, true, nullptr);
+ }
+
+ // /code/
+ fs::path codePath(gfxPackPath);
+ codePath.append("code");
+
+ if (fs::exists(codePath, ec))
+ {
+ // setup redirections
+ fscDeviceRedirect_map();
+ _iterateReplacedFiles(codePath, false, CafeSystem::GetInternalVirtualCodeFolder().c_str());
}
}
@@ -960,7 +979,7 @@ bool GraphicPack2::Activate()
auto option_upscale = rules.FindOption("upscaleMagFilter");
if(option_upscale && boost::iequals(*option_upscale, "NearestNeighbor"))
m_output_settings.upscale_filter = LatteTextureView::MagFilter::kNearestNeighbor;
- auto option_downscale = rules.FindOption("NearestNeighbor");
+ auto option_downscale = rules.FindOption("downscaleMinFilter");
if (option_downscale && boost::iequals(*option_downscale, "NearestNeighbor"))
m_output_settings.downscale_filter = LatteTextureView::MagFilter::kNearestNeighbor;
}
diff --git a/src/Cafe/GraphicPack/GraphicPack2.h b/src/Cafe/GraphicPack/GraphicPack2.h
index 9b6a86d4..fc9603cd 100644
--- a/src/Cafe/GraphicPack/GraphicPack2.h
+++ b/src/Cafe/GraphicPack/GraphicPack2.h
@@ -260,7 +260,7 @@ private:
CustomShader LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type) const;
void ApplyShaderPresets(std::string& shader_source) const;
void LoadReplacedFiles();
- void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC);
+ void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC, const char* virtualMountBase);
// ram mappings
std::vector> m_ramMappings;
diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
index e7369af6..e84c9fda 100644
--- a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
+++ b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
@@ -8,6 +8,7 @@
#include "gui/debugger/DebuggerWindow2.h"
#include "Cafe/OS/libs/coreinit/coreinit.h"
+#include "util/helpers/helpers.h"
#if BOOST_OS_WINDOWS
#include
@@ -136,11 +137,6 @@ void debugger_createCodeBreakpoint(uint32 address, uint8 bpType)
debugger_updateExecutionBreakpoint(address);
}
-void debugger_createExecuteBreakpoint(uint32 address)
-{
- debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL);
-}
-
namespace coreinit
{
std::vector& OSGetSchedulerThreads();
@@ -294,8 +290,23 @@ void debugger_toggleExecuteBreakpoint(uint32 address)
}
else
{
- // create new breakpoint
- debugger_createExecuteBreakpoint(address);
+ // create new execution breakpoint
+ debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL);
+ }
+}
+
+void debugger_toggleLoggingBreakpoint(uint32 address)
+{
+ auto existingBP = debugger_getFirstBP(address, DEBUGGER_BP_T_LOGGING);
+ if (existingBP)
+ {
+ // delete existing breakpoint
+ debugger_deleteBreakpoint(existingBP);
+ }
+ else
+ {
+ // create new logging breakpoint
+ debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_LOGGING);
}
}
@@ -447,6 +458,34 @@ bool debugger_hasPatch(uint32 address)
return false;
}
+void debugger_removePatch(uint32 address)
+{
+ for (sint32 i = 0; i < debuggerState.patches.size(); i++)
+ {
+ auto& patch = debuggerState.patches[i];
+ if (address < patch->address || address >= (patch->address + patch->length))
+ continue;
+ MPTR startAddress = patch->address;
+ MPTR endAddress = patch->address + patch->length;
+ // remove any breakpoints overlapping with the patch
+ for (auto& bp : debuggerState.breakpoints)
+ {
+ if (bp->address + 4 > startAddress && bp->address < endAddress)
+ {
+ bp->enabled = false;
+ debugger_updateExecutionBreakpoint(bp->address);
+ }
+ }
+ // restore original data
+ memcpy(MEMPTR(startAddress).GetPtr(), patch->origData.data(), patch->length);
+ PPCRecompiler_invalidateRange(startAddress, endAddress);
+ // remove patch
+ delete patch;
+ debuggerState.patches.erase(debuggerState.patches.begin() + i);
+ return;
+ }
+}
+
void debugger_stepInto(PPCInterpreter_t* hCPU, bool updateDebuggerWindow = true)
{
bool isRecEnabled = ppcRecompilerEnabled;
@@ -510,7 +549,48 @@ void debugger_enterTW(PPCInterpreter_t* hCPU)
{
if (bp->bpType == DEBUGGER_BP_T_LOGGING && bp->enabled)
{
- std::string logName = !bp->comment.empty() ? "Breakpoint '"+boost::nowide::narrow(bp->comment)+"'" : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address);
+ std::string comment = !bp->comment.empty() ? boost::nowide::narrow(bp->comment) : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address);
+
+ auto replacePlaceholders = [&](const std::string& prefix, const auto& formatFunc)
+ {
+ size_t pos = 0;
+ while ((pos = comment.find(prefix, pos)) != std::string::npos)
+ {
+ size_t endPos = comment.find('}', pos);
+ if (endPos == std::string::npos)
+ break;
+
+ try
+ {
+ if (int regNum = ConvertString(comment.substr(pos + prefix.length(), endPos - pos - prefix.length())); regNum >= 0 && regNum < 32)
+ {
+ std::string replacement = formatFunc(regNum);
+ comment.replace(pos, endPos - pos + 1, replacement);
+ pos += replacement.length();
+ }
+ else
+ {
+ pos = endPos + 1;
+ }
+ }
+ catch (...)
+ {
+ pos = endPos + 1;
+ }
+ }
+ };
+
+ // Replace integer register placeholders {rX}
+ replacePlaceholders("{r", [&](int regNum) {
+ return fmt::format("0x{:08X}", hCPU->gpr[regNum]);
+ });
+
+ // Replace floating point register placeholders {fX}
+ replacePlaceholders("{f", [&](int regNum) {
+ return fmt::format("{}", hCPU->fpr[regNum].fpr);
+ });
+
+ std::string logName = "Breakpoint '" + comment + "'";
std::string logContext = fmt::format("Thread: {:08x} LR: 0x{:08x}", MEMPTR(coreinit::OSGetCurrentThread()).GetMPTR(), hCPU->spr.LR, cemuLog_advancedPPCLoggingEnabled() ? " Stack Trace:" : "");
cemuLog_log(LogType::Force, "[Debugger] {} was executed! {}", logName, logContext);
if (cemuLog_advancedPPCLoggingEnabled())
@@ -547,7 +627,7 @@ void debugger_enterTW(PPCInterpreter_t* hCPU)
debuggerState.debugSession.stepInto = false;
debuggerState.debugSession.stepOver = false;
debuggerState.debugSession.run = false;
- while (true)
+ while (debuggerState.debugSession.isTrapped)
{
std::this_thread::sleep_for(std::chrono::milliseconds(1));
// check for step commands
diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.h b/src/Cafe/HW/Espresso/Debugger/Debugger.h
index 717df28a..c220eb8a 100644
--- a/src/Cafe/HW/Espresso/Debugger/Debugger.h
+++ b/src/Cafe/HW/Espresso/Debugger/Debugger.h
@@ -100,8 +100,8 @@ extern debuggerState_t debuggerState;
// new API
DebuggerBreakpoint* debugger_getFirstBP(uint32 address);
void debugger_createCodeBreakpoint(uint32 address, uint8 bpType);
-void debugger_createExecuteBreakpoint(uint32 address);
void debugger_toggleExecuteBreakpoint(uint32 address); // create/remove execute breakpoint
+void debugger_toggleLoggingBreakpoint(uint32 address); // create/remove logging breakpoint
void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp);
void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite);
@@ -114,6 +114,7 @@ void debugger_updateExecutionBreakpoint(uint32 address, bool forceRestore = fals
void debugger_createPatch(uint32 address, std::span patchData);
bool debugger_hasPatch(uint32 address);
+void debugger_removePatch(uint32 address);
void debugger_forceBreak(); // force breakpoint at the next possible instruction
bool debugger_isTrapped();
diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h
index b3ae45c3..5e09763b 100644
--- a/src/Cafe/HW/Espresso/EspressoISA.h
+++ b/src/Cafe/HW/Espresso/EspressoISA.h
@@ -10,6 +10,18 @@ namespace Espresso
CR_BIT_INDEX_SO = 3,
};
+ enum class PSQ_LOAD_TYPE
+ {
+ TYPE_F32 = 0,
+ TYPE_UNUSED1 = 1,
+ TYPE_UNUSED2 = 2,
+ TYPE_UNUSED3 = 3,
+ TYPE_U8 = 4,
+ TYPE_U16 = 5,
+ TYPE_S8 = 6,
+ TYPE_S16 = 7,
+ };
+
enum class PrimaryOpcode
{
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)
@@ -91,13 +103,15 @@ namespace Espresso
BCCTR = 528
};
- enum class OPCODE_31
+ enum class Opcode31
{
-
+ TW = 4,
+ MFTB = 371,
};
inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); };
inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); };
+ inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); };
struct BOField
{
@@ -132,6 +146,12 @@ namespace Espresso
uint8 bo;
};
+ // returns true if LK bit is set, only valid for branch instructions
+ inline bool DecodeLK(uint32 opcode)
+ {
+ return (opcode & 1) != 0;
+ }
+
inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK)
{
LI = opcode & 0x3fffffc;
@@ -183,13 +203,7 @@ namespace Espresso
_decodeForm_D_branch(opcode, BD, BO, BI, AA, LK);
}
- inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
- {
- // form XL (with BD field expected to be zero)
- _decodeForm_XL(opcode, BO, BI, LK);
- }
-
- inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
+ inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR
{
// form XL (with BD field expected to be zero)
_decodeForm_XL(opcode, BO, BI, LK);
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
index fe9316f0..2fe07509 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
@@ -3,12 +3,12 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow)
{
if (hasOverflow)
{
- hCPU->spr.XER |= XER_SO;
- hCPU->spr.XER |= XER_OV;
+ hCPU->xer_so = 1;
+ hCPU->xer_ov = 1;
}
else
{
- hCPU->spr.XER &= ~XER_OV;
+ hCPU->xer_ov = 0;
}
}
@@ -41,7 +41,7 @@ static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_ADDO(PPCInterpreter_t* hCPU, uint32 opcode)
{
- // untested (Don't Starve Giant Edition uses this instruction + BSO)
+ // Don't Starve Giant Edition uses this instruction + BSO
PPC_OPC_TEMPL3_XO();
uint32 result = hCPU->gpr[rA] + hCPU->gpr[rB];
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(hCPU->gpr[rA], hCPU->gpr[rB], result));
@@ -113,7 +113,6 @@ static void PPCInterpreter_ADDEO(PPCInterpreter_t* hCPU, uint32 opcode)
else
hCPU->xer_ca = 0;
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, b, hCPU->gpr[rD]));
- // update CR
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -130,7 +129,7 @@ static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode)
{
- int rD, rA;
+ sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
@@ -145,7 +144,7 @@ static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode)
{
- int rD, rA;
+ sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
@@ -155,14 +154,13 @@ static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
- // update cr0 flags
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDIS(PPCInterpreter_t* hCPU, uint32 opcode)
{
- int rD, rA;
+ sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm);
hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0) + imm;
@@ -185,6 +183,23 @@ static void PPCInterpreter_ADDZE(PPCInterpreter_t* hCPU, uint32 opcode)
PPCInterpreter_nextInstruction(hCPU);
}
+static void PPCInterpreter_ADDZEO(PPCInterpreter_t* hCPU, uint32 opcode)
+{
+ PPC_OPC_TEMPL3_XO();
+ PPC_ASSERT(rB == 0);
+ uint32 a = hCPU->gpr[rA];
+ uint32 ca = hCPU->xer_ca;
+ hCPU->gpr[rD] = a + ca;
+ PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, 0, hCPU->gpr[rD]));
+ if ((a == 0xffffffff) && ca)
+ hCPU->xer_ca = 1;
+ else
+ hCPU->xer_ca = 0;
+ if (opHasRC())
+ ppc_update_cr0(hCPU, hCPU->gpr[rD]);
+ PPCInterpreter_nextInstruction(hCPU);
+}
+
static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
@@ -201,6 +216,23 @@ static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode)
PPCInterpreter_nextInstruction(hCPU);
}
+static void PPCInterpreter_ADDMEO(PPCInterpreter_t* hCPU, uint32 opcode)
+{
+ PPC_OPC_TEMPL3_XO();
+ PPC_ASSERT(rB == 0);
+ uint32 a = hCPU->gpr[rA];
+ uint32 ca = hCPU->xer_ca;
+ hCPU->gpr[rD] = a + ca + 0xffffffff;
+ PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, 0xffffffff, hCPU->gpr[rD]));
+ if (a || ca)
+ hCPU->xer_ca = 1;
+ else
+ hCPU->xer_ca = 0;
+ if (opHasRC())
+ ppc_update_cr0(hCPU, hCPU->gpr[rD]);
+ PPCInterpreter_nextInstruction(hCPU);
+}
+
static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
@@ -246,7 +278,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
hCPU->gpr[rD] = ~a + b + 1;
- // update xer
+ // update carry
if (ppc_carry_3(~a, b, 1))
hCPU->xer_ca = 1;
else
@@ -260,7 +292,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_SUBFIC(PPCInterpreter_t* hCPU, uint32 opcode)
{
- int rD, rA;
+ sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
@@ -284,7 +316,6 @@ static void PPCInterpreter_SUBFE(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
- // update cr0
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -304,7 +335,6 @@ static void PPCInterpreter_SUBFEO(PPCInterpreter_t* hCPU, uint32 opcode)
else
hCPU->xer_ca = 0;
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, b, result));
- // update cr0
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -326,9 +356,25 @@ static void PPCInterpreter_SUBFZE(PPCInterpreter_t* hCPU, uint32 opcode)
PPCInterpreter_nextInstruction(hCPU);
}
+static void PPCInterpreter_SUBFZEO(PPCInterpreter_t* hCPU, uint32 opcode)
+{
+ PPC_OPC_TEMPL3_XO();
+ PPC_ASSERT(rB == 0);
+ uint32 a = hCPU->gpr[rA];
+ uint32 ca = hCPU->xer_ca;
+ hCPU->gpr[rD] = ~a + ca;
+ PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, 0, hCPU->gpr[rD]));
+ if (a == 0 && ca)
+ hCPU->xer_ca = 1;
+ else
+ hCPU->xer_ca = 0;
+ if (opHasRC())
+ ppc_update_cr0(hCPU, hCPU->gpr[rD]);
+ PPCInterpreter_nextInstruction(hCPU);
+}
+
static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode)
{
- // untested
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
uint32 a = hCPU->gpr[rA];
@@ -339,7 +385,24 @@ static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
- // update cr0
+ if (opcode & PPC_OPC_RC)
+ ppc_update_cr0(hCPU, hCPU->gpr[rD]);
+ PPCInterpreter_nextInstruction(hCPU);
+}
+
+static void PPCInterpreter_SUBFMEO(PPCInterpreter_t* hCPU, uint32 opcode)
+{
+ PPC_OPC_TEMPL3_XO();
+ PPC_ASSERT(rB == 0);
+ uint32 a = hCPU->gpr[rA];
+ uint32 ca = hCPU->xer_ca;
+ hCPU->gpr[rD] = ~a + 0xFFFFFFFF + ca;
+ PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, 0xFFFFFFFF, hCPU->gpr[rD]));
+ // update xer carry
+ if (ppc_carry_3(~a, 0xFFFFFFFF, ca))
+ hCPU->xer_ca = 1;
+ else
+ hCPU->xer_ca = 0;
if (opcode & PPC_OPC_RC)
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -352,13 +415,8 @@ static void PPCInterpreter_MULHW_(PPCInterpreter_t* hCPU, uint32 opcode)
sint64 b = (sint32)hCPU->gpr[rB];
sint64 c = a * b;
hCPU->gpr[rD] = ((uint64)c) >> 32;
- if (opcode & PPC_OPC_RC) {
- // update cr0 flags
-#ifdef CEMU_DEBUG_ASSERT
- assert_dbg();
-#endif
+ if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
- }
PPCInterpreter_nextInstruction(hCPU);
}
@@ -409,14 +467,14 @@ static void PPCInterpreter_MULLI(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
- sint32 a = hCPU->gpr[rA];
- sint32 b = hCPU->gpr[rB];
+ sint32 a = (sint32)hCPU->gpr[rA];
+ sint32 b = (sint32)hCPU->gpr[rB];
if (b == 0)
- {
- cemuLog_logDebug(LogType::Force, "Error: Division by zero! [{:08x}]", (uint32)hCPU->instructionPointer);
- b++;
- }
- hCPU->gpr[rD] = a / b;
+ hCPU->gpr[rD] = a < 0 ? 0xFFFFFFFF : 0;
+ else if (a == 0x80000000 && b == 0xFFFFFFFF)
+ hCPU->gpr[rD] = 0xFFFFFFFF;
+ else
+ hCPU->gpr[rD] = a / b;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -425,16 +483,23 @@ static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
- sint32 a = hCPU->gpr[rA];
- sint32 b = hCPU->gpr[rB];
+ sint32 a = (sint32)hCPU->gpr[rA];
+ sint32 b = (sint32)hCPU->gpr[rB];
if (b == 0)
{
PPCInterpreter_setXerOV(hCPU, true);
- PPCInterpreter_nextInstruction(hCPU);
- return;
+ hCPU->gpr[rD] = a < 0 ? 0xFFFFFFFF : 0;
+ }
+ else if(a == 0x80000000 && b == 0xFFFFFFFF)
+ {
+ PPCInterpreter_setXerOV(hCPU, true);
+ hCPU->gpr[rD] = 0xFFFFFFFF;
+ }
+ else
+ {
+ hCPU->gpr[rD] = a / b;
+ PPCInterpreter_setXerOV(hCPU, false);
}
- hCPU->gpr[rD] = a / b;
- PPCInterpreter_setXerOV(hCPU, false);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -443,12 +508,14 @@ static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
- if (hCPU->gpr[rB] == 0)
- {
- PPCInterpreter_nextInstruction(hCPU);
- return;
- }
- hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
+ uint32 a = hCPU->gpr[rA];
+ uint32 b = hCPU->gpr[rB];
+ if (b == 0)
+ hCPU->gpr[rD] = 0;
+ else if (a == 0x80000000 && b == 0xFFFFFFFF)
+ hCPU->gpr[rD] = 0;
+ else
+ hCPU->gpr[rD] = a / b;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -457,14 +524,23 @@ static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_DIVWUO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
- if (hCPU->gpr[rB] == 0)
+ uint32 a = hCPU->gpr[rA];
+ uint32 b = hCPU->gpr[rB];
+ if (b == 0)
{
PPCInterpreter_setXerOV(hCPU, true);
- PPCInterpreter_nextInstruction(hCPU);
- return;
+ hCPU->gpr[rD] = 0;
+ }
+ else if(a == 0x80000000 && b == 0xFFFFFFFF)
+ {
+ PPCInterpreter_setXerOV(hCPU, false);
+ hCPU->gpr[rD] = 0;
+ }
+ else
+ {
+ hCPU->gpr[rD] = a / b;
+ PPCInterpreter_setXerOV(hCPU, false);
}
- hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
- PPCInterpreter_setXerOV(hCPU, false);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -491,6 +567,13 @@ static void PPCInterpreter_CRANDC(PPCInterpreter_t* hCPU, uint32 opcode)
PPCInterpreter_nextInstruction(hCPU);
}
+static void PPCInterpreter_CRNAND(PPCInterpreter_t* hCPU, uint32 opcode)
+{
+ PPC_OPC_TEMPL_X_CR();
+ ppc_setCRBit(hCPU, crD, (ppc_getCRBit(hCPU, crA)&ppc_getCRBit(hCPU, crB)) ^ 1);
+ PPCInterpreter_nextInstruction(hCPU);
+}
+
static void PPCInterpreter_CROR(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
@@ -848,8 +931,7 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- if ((hCPU->spr.XER & XER_SO) != 0)
- hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
+ hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -871,8 +953,7 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- if ((hCPU->spr.XER & XER_SO) != 0)
- hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
+ hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -895,8 +976,7 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- if (hCPU->spr.XER & XER_SO)
- hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
+ hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -919,8 +999,7 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- if (hCPU->spr.XER & XER_SO)
- hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
+ hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
index aed571d7..2c99b84c 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
@@ -32,7 +32,7 @@ espresso_frsqrte_entry_t frsqrteLookupTable[32] =
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
};
-double frsqrte_espresso(double input)
+ATTR_MS_ABI double frsqrte_espresso(double input)
{
unsigned long long x = *(unsigned long long*)&input;
@@ -111,7 +111,7 @@ espresso_fres_entry_t fresLookupTable[32] =
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
};
-double fres_espresso(double input)
+ATTR_MS_ABI double fres_espresso(double input)
{
// based on testing we know that fres uses only the first 15 bits of the mantissa
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp
index 24219e66..cf7ba195 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp
@@ -2,62 +2,70 @@
#include "PPCInterpreterInternal.h"
#include "PPCInterpreterHelper.h"
-std::unordered_set sUnsupportedHLECalls;
+std::unordered_set s_unsupportedHLECalls;
void PPCInterpreter_handleUnsupportedHLECall(PPCInterpreter_t* hCPU)
{
const char* libFuncName = (char*)memory_getPointerFromVirtualOffset(hCPU->instructionPointer + 8);
std::string tempString = fmt::format("Unsupported lib call: {}", libFuncName);
- if (sUnsupportedHLECalls.find(tempString) == sUnsupportedHLECalls.end())
+ if (s_unsupportedHLECalls.find(tempString) == s_unsupportedHLECalls.end())
{
cemuLog_log(LogType::UnsupportedAPI, "{}", tempString);
- sUnsupportedHLECalls.emplace(tempString);
+ s_unsupportedHLECalls.emplace(tempString);
}
hCPU->gpr[3] = 0;
PPCInterpreter_nextInstruction(hCPU);
}
-std::vector* sPPCHLETable{};
+static constexpr size_t HLE_TABLE_CAPACITY = 0x4000;
+HLECALL s_ppcHleTable[HLE_TABLE_CAPACITY]{};
+sint32 s_ppcHleTableWriteIndex = 0;
+std::mutex s_ppcHleTableMutex;
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall, std::string hleName)
{
- if (!sPPCHLETable)
- sPPCHLETable = new std::vector();
- for (sint32 i = 0; i < sPPCHLETable->size(); i++)
+ std::unique_lock _l(s_ppcHleTableMutex);
+ if (s_ppcHleTableWriteIndex >= HLE_TABLE_CAPACITY)
{
- if ((*sPPCHLETable)[i] == hleCall)
- return i;
+ cemuLog_log(LogType::Force, "HLE table is full");
+ cemu_assert(false);
}
- HLEIDX newFuncIndex = (sint32)sPPCHLETable->size();
- sPPCHLETable->resize(sPPCHLETable->size() + 1);
- (*sPPCHLETable)[newFuncIndex] = hleCall;
- return newFuncIndex;
+ for (sint32 i = 0; i < s_ppcHleTableWriteIndex; i++)
+ {
+ if (s_ppcHleTable[i] == hleCall)
+ {
+ return i;
+ }
+ }
+ cemu_assert(s_ppcHleTableWriteIndex < HLE_TABLE_CAPACITY);
+ s_ppcHleTable[s_ppcHleTableWriteIndex] = hleCall;
+ HLEIDX funcIndex = s_ppcHleTableWriteIndex;
+ s_ppcHleTableWriteIndex++;
+ return funcIndex;
}
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex)
{
- if (funcIndex < 0 || funcIndex >= sPPCHLETable->size())
+ if (funcIndex < 0 || funcIndex >= HLE_TABLE_CAPACITY)
return nullptr;
- return sPPCHLETable->data()[funcIndex];
+ return s_ppcHleTable[funcIndex];
}
-std::mutex g_hleLogMutex;
+std::mutex s_hleLogMutex;
void PPCInterpreter_virtualHLE(PPCInterpreter_t* hCPU, unsigned int opcode)
{
uint32 hleFuncId = opcode & 0xFFFF;
- if (hleFuncId == 0xFFD0)
+ if (hleFuncId == 0xFFD0) [[unlikely]]
{
- g_hleLogMutex.lock();
+ s_hleLogMutex.lock();
PPCInterpreter_handleUnsupportedHLECall(hCPU);
- g_hleLogMutex.unlock();
- return;
+ s_hleLogMutex.unlock();
}
else
{
// os lib function
- cemu_assert(hleFuncId < sPPCHLETable->size());
- auto hleCall = (*sPPCHLETable)[hleFuncId];
+ auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
cemu_assert(hleCall);
hleCall(hCPU);
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp
index cacfa4a9..547472ab 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp
@@ -428,9 +428,6 @@ public:
}
};
-uint32 testIP[100];
-uint32 testIPC = 0;
-
template
class PPCInterpreterContainer
{
@@ -466,6 +463,10 @@ public:
case 1: // virtual HLE
PPCInterpreter_virtualHLE(hCPU, opcode);
break;
+ case 3:
+ cemuLog_logDebug(LogType::Force, "Unsupported TWI instruction executed at {:08x}", hCPU->instructionPointer);
+ PPCInterpreter_nextInstruction(hCPU);
+ break;
case 4:
switch (PPC_getBits(opcode, 30, 5))
{
@@ -482,8 +483,9 @@ public:
PPCInterpreter_PS_CMPU1(hCPU, opcode);
break;
default:
- debug_printf("Unknown execute %04X as [4->0] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->0] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
cemu_assert_unimplemented();
+ hCPU->instructionPointer += 4;
break;
}
break;
@@ -509,8 +511,9 @@ public:
PPCInterpreter_PS_ABS(hCPU, opcode);
break;
default:
- debug_printf("Unknown execute %04X as [4->8] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->8] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
cemu_assert_unimplemented();
+ hCPU->instructionPointer += 4;
break;
}
break;
@@ -548,8 +551,9 @@ public:
PPCInterpreter_PS_MERGE11(hCPU, opcode);
break;
default:
- debug_printf("Unknown execute %04X as [4->16] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
- debugBreakpoint();
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->16] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
+ cemu_assert_unimplemented();
+ hCPU->instructionPointer += 4;
break;
}
break;
@@ -590,8 +594,9 @@ public:
PPCInterpreter_PS_NMADD(hCPU, opcode);
break;
default:
- debug_printf("Unknown execute %04X as [4] at %08X\n", PPC_getBits(opcode, 30, 5), hCPU->instructionPointer);
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4] at {:08x}", PPC_getBits(opcode, 30, 5), hCPU->instructionPointer);
cemu_assert_unimplemented();
+ hCPU->instructionPointer += 4;
break;
}
break;
@@ -623,12 +628,15 @@ public:
PPCInterpreter_BCX(hCPU, opcode);
break;
case 17:
- if (PPC_getBits(opcode, 30, 1) == 1) {
+ if (PPC_getBits(opcode, 30, 1) == 1)
+ {
PPCInterpreter_SC(hCPU, opcode);
}
- else {
- debug_printf("Unsupported Opcode [0x17 --> 0x0]\n");
+ else
+ {
+ cemuLog_logDebug(LogType::Force, "Unsupported Opcode [0x17 --> 0x0]");
cemu_assert_unimplemented();
+ hCPU->instructionPointer += 4;
}
break;
case 18:
@@ -658,6 +666,9 @@ public:
case 193:
PPCInterpreter_CRXOR(hCPU, opcode);
break;
+ case 225:
+ PPCInterpreter_CRNAND(hCPU, opcode);
+ break;
case 257:
PPCInterpreter_CRAND(hCPU, opcode);
break;
@@ -674,8 +685,9 @@ public:
PPCInterpreter_BCCTR(hCPU, opcode);
break;
default:
- debug_printf("Unknown execute %04X as [19] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [19] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
cemu_assert_unimplemented();
+ hCPU->instructionPointer += 4;
break;
}
break;
@@ -713,9 +725,6 @@ public:
PPCInterpreter_CMP(hCPU, opcode);
break;
case 4:
- #ifdef CEMU_DEBUG_ASSERT
- debug_printf("TW instruction executed at %08x\n", hCPU->instructionPointer);
- #endif
PPCInterpreter_TW(hCPU, opcode);
break;
case 8:
@@ -895,6 +904,12 @@ public:
case 522:
PPCInterpreter_ADDCO(hCPU, opcode);
break;
+ case 523: // 11 | OE
+ PPCInterpreter_MULHWU_(hCPU, opcode); // OE is ignored
+ break;
+ case 533:
+ PPCInterpreter_LSWX(hCPU, opcode);
+ break;
case 534:
PPCInterpreter_LWBRX(hCPU, opcode);
break;
@@ -913,6 +928,9 @@ public:
case 567:
PPCInterpreter_LFSUX(hCPU, opcode);
break;
+ case 587: // 75 | OE
+ PPCInterpreter_MULHW_(hCPU, opcode); // OE is ignored for MULHW
+ break;
case 595:
PPCInterpreter_MFSR(hCPU, opcode);
break;
@@ -943,15 +961,30 @@ public:
case 663:
PPCInterpreter_STFSX(hCPU, opcode);
break;
+ case 661:
+ PPCInterpreter_STSWX(hCPU, opcode);
+ break;
case 695:
PPCInterpreter_STFSUX(hCPU, opcode);
break;
+ case 712: // 200 | OE
+ PPCInterpreter_SUBFZEO(hCPU, opcode);
+ break;
+ case 714: // 202 | OE
+ PPCInterpreter_ADDZEO(hCPU, opcode);
+ break;
case 725:
PPCInterpreter_STSWI(hCPU, opcode);
break;
case 727:
PPCInterpreter_STFDX(hCPU, opcode);
break;
+ case 744: // 232 | OE
+ PPCInterpreter_SUBFMEO(hCPU, opcode);
+ break;
+ case 746: // 234 | OE
+ PPCInterpreter_ADDMEO(hCPU, opcode);
+ break;
case 747:
PPCInterpreter_MULLWO(hCPU, opcode);
break;
@@ -998,10 +1031,8 @@ public:
PPCInterpreter_DCBZ(hCPU, opcode);
break;
default:
- debug_printf("Unknown execute %04X as [31] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
- #ifdef CEMU_DEBUG_ASSERT
- assert_dbg();
- #endif
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [31] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
+ cemu_assert_unimplemented();
hCPU->instructionPointer += 4;
break;
}
@@ -1084,7 +1115,7 @@ public:
case 57:
PPCInterpreter_PSQ_LU(hCPU, opcode);
break;
- case 59: //Opcode category
+ case 59: // opcode category
switch (PPC_getBits(opcode, 30, 5))
{
case 18:
@@ -1115,8 +1146,9 @@ public:
PPCInterpreter_FNMADDS(hCPU, opcode);
break;
default:
- debug_printf("Unknown execute %04X as [59] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [59] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
cemu_assert_unimplemented();
+ hCPU->instructionPointer += 4;
break;
}
break;
@@ -1195,18 +1227,19 @@ public:
case 583:
PPCInterpreter_MFFS(hCPU, opcode);
break;
- case 711: // IBM documentation has this wrong as 771?
+ case 711:
PPCInterpreter_MTFSF(hCPU, opcode);
break;
default:
- debug_printf("Unknown execute %04X as [63] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [63] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
cemu_assert_unimplemented();
+ PPCInterpreter_nextInstruction(hCPU);
break;
}
}
break;
default:
- debug_printf("Unknown execute %04X at %08X\n", PPC_getBits(opcode, 5, 6), (unsigned int)hCPU->instructionPointer);
+ cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} at {:08x}\n", PPC_getBits(opcode, 5, 6), (unsigned int)hCPU->instructionPointer);
cemu_assert_unimplemented();
}
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
index bc8458d9..896fd21c 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
@@ -50,9 +50,9 @@
#define CR_BIT_EQ 2
#define CR_BIT_SO 3
-#define XER_SO (1<<31) // summary overflow bit
-#define XER_OV (1<<30) // overflow bit
#define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8
+#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO
+#define XER_BIT_OV (30)
// FPSCR
#define FPSCR_VXSNAN (1<<24)
@@ -118,7 +118,8 @@
static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r)
{
- hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0;
+ cemu_assert_debug(hCPU->xer_so <= 1);
+ hCPU->cr[CR_BIT_SO] = hCPU->xer_so;
hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0);
hCPU->cr[CR_BIT_EQ] = (r == 0);
hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0
@@ -190,8 +191,8 @@ inline double roundTo25BitAccuracy(double d)
return *(double*)&v;
}
-double fres_espresso(double input);
-double frsqrte_espresso(double input);
+ATTR_MS_ABI double fres_espresso(double input);
+ATTR_MS_ABI double frsqrte_espresso(double input);
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
index 694e05e6..ea7bb038 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
@@ -31,7 +31,7 @@ static void PPCInterpreter_STW(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STWU(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS;
+ sint32 rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU32(hCPU, hCPU->gpr[rA] + imm, hCPU->gpr[rS]);
@@ -42,7 +42,7 @@ static void PPCInterpreter_STWU(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STWX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, rB;
+ sint32 rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], hCPU->gpr[rS]);
PPCInterpreter_nextInstruction(hCPU);
@@ -85,7 +85,8 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode)
ppc_setCRBit(hCPU, CR_BIT_GT, 0);
ppc_setCRBit(hCPU, CR_BIT_EQ, 1);
}
- ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0);
+ cemu_assert_debug(hCPU->xer_so <= 1);
+ ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so);
// remove reservation
hCPU->reservedMemAddr = 0;
hCPU->reservedMemValue = 0;
@@ -102,7 +103,7 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STWUX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, rB;
+ sint32 rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], hCPU->gpr[rS]);
if (rA)
@@ -112,7 +113,7 @@ static void PPCInterpreter_STWUX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STWBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, rB;
+ sint32 rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], _swapEndianU32(hCPU->gpr[rS]));
PPCInterpreter_nextInstruction(hCPU);
@@ -120,7 +121,7 @@ static void PPCInterpreter_STWBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STMW(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rS, rA;
+ sint32 rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm;
@@ -135,7 +136,7 @@ static void PPCInterpreter_STMW(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STH(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS;
+ sint32 rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint16)hCPU->gpr[rS]);
@@ -144,7 +145,7 @@ static void PPCInterpreter_STH(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STHU(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS;
+ sint32 rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint16)hCPU->gpr[rS]);
@@ -155,7 +156,7 @@ static void PPCInterpreter_STHU(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STHX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, rB;
+ sint32 rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint16)hCPU->gpr[rS]);
PPCInterpreter_nextInstruction(hCPU);
@@ -163,7 +164,7 @@ static void PPCInterpreter_STHX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STHUX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, rB;
+ sint32 rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint16)hCPU->gpr[rS]);
if (rA)
@@ -173,7 +174,7 @@ static void PPCInterpreter_STHUX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STHBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, rB;
+ sint32 rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], _swapEndianU16((uint16)hCPU->gpr[rS]));
PPCInterpreter_nextInstruction(hCPU);
@@ -181,7 +182,7 @@ static void PPCInterpreter_STHBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STB(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS;
+ sint32 rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint8)hCPU->gpr[rS]);
@@ -190,7 +191,7 @@ static void PPCInterpreter_STB(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STBU(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS;
+ sint32 rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU8(hCPU, hCPU->gpr[rA] + imm, (uint8)hCPU->gpr[rS]);
@@ -200,7 +201,7 @@ static void PPCInterpreter_STBU(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STBX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, rB;
+ sint32 rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint8)hCPU->gpr[rS]);
PPCInterpreter_nextInstruction(hCPU);
@@ -208,7 +209,7 @@ static void PPCInterpreter_STBX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STBUX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, rB;
+ sint32 rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint8)hCPU->gpr[rS]);
if (rA)
@@ -218,7 +219,7 @@ static void PPCInterpreter_STBUX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- int rA, rS, nb;
+ sint32 rA, rS, nb;
PPC_OPC_TEMPL_X(Opcode, rS, rA, nb);
if (nb == 0) nb = 32;
uint32 ea = rA ? hCPU->gpr[rA] : 0;
@@ -228,7 +229,39 @@ static void PPCInterpreter_STSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
{
if (i == 0)
{
- r = hCPU->gpr[rS];
+ r = rS < 32 ? hCPU->gpr[rS] : 0; // what happens if rS is out of bounds?
+ rS++;
+ rS %= 32;
+ i = 4;
+ }
+ ppcItpCtrl::ppcMem_writeDataU8(hCPU, ea, (r >> 24));
+ r <<= 8;
+ ea++;
+ i--;
+ nb--;
+ }
+ PPCInterpreter_nextInstruction(hCPU);
+}
+
+static void PPCInterpreter_STSWX(PPCInterpreter_t* hCPU, uint32 Opcode)
+{
+ sint32 rA, rS, rB;
+ PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
+ sint32 nb = hCPU->spr.XER&0x7F;
+ if (nb == 0)
+ {
+ PPCInterpreter_nextInstruction(hCPU);
+ return;
+ }
+ uint32 ea = rA ? hCPU->gpr[rA] : 0;
+ ea += hCPU->gpr[rB];
+ uint32 r = 0;
+ int i = 0;
+ while (nb > 0)
+ {
+ if (i == 0)
+ {
+ r = rS < 32 ? hCPU->gpr[rS] : 0; // what happens if rS is out of bounds?
rS++;
rS %= 32;
i = 4;
@@ -459,7 +492,6 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
PPC_OPC_TEMPL_X(Opcode, rD, rA, nb);
if (nb == 0)
nb = 32;
-
uint32 ea = rA ? hCPU->gpr[rA] : 0;
uint32 r = 0;
int i = 4;
@@ -469,7 +501,8 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
if (i == 0)
{
i = 4;
- hCPU->gpr[rD] = r;
+ if(rD < 32)
+ hCPU->gpr[rD] = r;
rD++;
rD %= 32;
r = 0;
@@ -486,7 +519,52 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
r <<= 8;
i--;
}
- hCPU->gpr[rD] = r;
+ if(rD < 32)
+ hCPU->gpr[rD] = r;
+ PPCInterpreter_nextInstruction(hCPU);
+}
+
+static void PPCInterpreter_LSWX(PPCInterpreter_t* hCPU, uint32 Opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
+ // byte count comes from XER
+ uint32 nb = (hCPU->spr.XER>>0)&0x7F;
+ if (nb == 0)
+ {
+ PPCInterpreter_nextInstruction(hCPU);
+ return; // no-op
+ }
+ uint32 ea = rA ? hCPU->gpr[rA] : 0;
+ ea += hCPU->gpr[rB];
+ uint32 r = 0;
+ int i = 4;
+ uint8 v;
+ while (nb>0)
+ {
+ if (i == 0)
+ {
+ i = 4;
+ if(rD < 32)
+ hCPU->gpr[rD] = r;
+ rD++;
+ rD %= 32;
+ r = 0;
+ }
+ v = ppcItpCtrl::ppcMem_readDataU8(hCPU, ea);
+ r <<= 8;
+ r |= v;
+ ea++;
+ i--;
+ nb--;
+ }
+ while (i)
+ {
+ r <<= 8;
+ i--;
+ }
+ if(rD < 32)
+ hCPU->gpr[rD] = r;
PPCInterpreter_nextInstruction(hCPU);
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
index ace1601f..4449f135 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
@@ -63,16 +63,25 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU)
{
uint32 xerValue = hCPU->spr.XER;
- xerValue &= ~(1<xer_ca )
- xerValue |= (1<xer_ca)
+ xerValue |= (1 << XER_BIT_CA);
+ if (hCPU->xer_so)
+ xerValue |= (1 << XER_BIT_SO);
+ if (hCPU->xer_ov)
+ xerValue |= (1 << XER_BIT_OV);
return xerValue;
}
void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v)
{
- hCPU->spr.XER = v;
- hCPU->xer_ca = (v>>XER_BIT_CA)&1;
+ const uint32 XER_MASK = 0xE0FFFFFF; // some bits are masked out. Figure out which ones exactly
+ hCPU->spr.XER = v & XER_MASK;
+ hCPU->xer_ca = (v >> XER_BIT_CA) & 1;
+ hCPU->xer_so = (v >> XER_BIT_SO) & 1;
+ hCPU->xer_ov = (v >> XER_BIT_OV) & 1;
}
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU)
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
index 12f86427..7809a01d 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
@@ -5,7 +5,6 @@
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
#include "../Recompiler/PPCRecompiler.h"
-#include "../Recompiler/PPCRecompilerX64.h"
#include
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
@@ -94,7 +93,6 @@ void PPCInterpreter_MTCRF(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// frequently used by GCC compiled code (e.g. SM64 port)
// tested
-
uint32 rS;
uint32 crfMask;
PPC_OPC_TEMPL_XFX(Opcode, rS, crfMask);
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp
index 718162be..9bfcd53d 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp
@@ -68,6 +68,8 @@ static void PPCInterpreter_TW(PPCInterpreter_t* hCPU, uint32 opcode)
PPC_OPC_TEMPL_X(opcode, to, rA, rB);
cemu_assert_debug(to == 0);
+ if(to != 0)
+ PPCInterpreter_nextInstruction(hCPU);
if (rA == DEBUGGER_BP_T_DEBUGGER)
debugger_enterTW(hCPU);
diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h
index c315ed0e..fd943d39 100644
--- a/src/Cafe/HW/Espresso/PPCState.h
+++ b/src/Cafe/HW/Espresso/PPCState.h
@@ -49,12 +49,12 @@ struct PPCInterpreter_t
uint32 fpscr;
uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..)
uint8 xer_ca; // carry from xer
- uint8 LSQE;
- uint8 PSE;
+ uint8 xer_so;
+ uint8 xer_ov;
// thread remaining cycles
sint32 remainingCycles; // if this value goes below zero, the next thread is scheduled
sint32 skippedCycles; // number of skipped cycles
- struct
+ struct
{
uint32 LR;
uint32 CTR;
@@ -67,9 +67,10 @@ struct PPCInterpreter_t
uint32 reservedMemValue;
// temporary storage for recompiler
FPR_t temporaryFPR[8];
- uint32 temporaryGPR[4];
+ uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away
+ uint32 temporaryGPR_reg[4];
// values below this are not used by Cafe OS usermode
- struct
+ struct
{
uint32 fpecr; // is this the same register as fpscr ?
uint32 DEC;
@@ -84,7 +85,7 @@ struct PPCInterpreter_t
// DMA
uint32 dmaU;
uint32 dmaL;
- // MMU
+ // MMU
uint32 dbatU[8];
uint32 dbatL[8];
uint32 ibatU[8];
@@ -92,6 +93,8 @@ struct PPCInterpreter_t
uint32 sr[16];
uint32 sdr1;
}sprExtended;
+ uint8 LSQE;
+ uint8 PSE;
// global CPU values
PPCInterpreterGlobal_t* global;
// interpreter control
@@ -227,9 +230,9 @@ static inline float flushDenormalToZero(float f)
// HLE interface
-typedef void(*HLECALL)(PPCInterpreter_t* hCPU);
+using HLECALL = void(*)(PPCInterpreter_t*);
+using HLEIDX = sint32;
-typedef sint32 HLEIDX;
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall, std::string hleName);
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex);
diff --git a/src/Cafe/HW/Espresso/PPCTimer.cpp b/src/Cafe/HW/Espresso/PPCTimer.cpp
index c27c94ee..257973a6 100644
--- a/src/Cafe/HW/Espresso/PPCTimer.cpp
+++ b/src/Cafe/HW/Espresso/PPCTimer.cpp
@@ -1,5 +1,4 @@
#include "Cafe/HW/Espresso/Const.h"
-#include "asm/x64util.h"
#include "config/ActiveSettings.h"
#include "util/helpers/fspinlock.h"
#include "util/highresolutiontimer/HighResolutionTimer.h"
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
new file mode 100644
index 00000000..728460a4
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
@@ -0,0 +1,1695 @@
+#include "BackendAArch64.h"
+
+#pragma push_macro("CSIZE")
+#undef CSIZE
+#include
+#pragma pop_macro("CSIZE")
+#include
+
+#include
+
+#include "../PPCRecompiler.h"
+#include "Common/precompiled.h"
+#include "Common/cpu_features.h"
+#include "HW/Espresso/Interpreter/PPCInterpreterInternal.h"
+#include "HW/Espresso/Interpreter/PPCInterpreterHelper.h"
+#include "HW/Espresso/PPCState.h"
+
+using namespace Xbyak_aarch64;
+
+constexpr uint32 TEMP_GPR_1_ID = 25;
+constexpr uint32 TEMP_GPR_2_ID = 26;
+constexpr uint32 PPC_RECOMPILER_INSTANCE_DATA_REG_ID = 27;
+constexpr uint32 MEMORY_BASE_REG_ID = 28;
+constexpr uint32 HCPU_REG_ID = 29;
+
+constexpr uint32 TEMP_FPR_ID = 31;
+
+struct FPReg
+{
+ explicit FPReg(size_t index)
+ : index(index), VReg(index), QReg(index), DReg(index), SReg(index), HReg(index), BReg(index)
+ {
+ }
+ const size_t index;
+ const VReg VReg;
+ const QReg QReg;
+ const DReg DReg;
+ const SReg SReg;
+ const HReg HReg;
+ const BReg BReg;
+};
+
+struct GPReg
+{
+ explicit GPReg(size_t index)
+ : index(index), XReg(index), WReg(index)
+ {
+ }
+ const size_t index;
+ const XReg XReg;
+ const WReg WReg;
+};
+
+static const XReg HCPU_REG{HCPU_REG_ID}, PPC_REC_INSTANCE_REG{PPC_RECOMPILER_INSTANCE_DATA_REG_ID}, MEM_BASE_REG{MEMORY_BASE_REG_ID};
+static const GPReg TEMP_GPR1{TEMP_GPR_1_ID};
+static const GPReg TEMP_GPR2{TEMP_GPR_2_ID};
+static const GPReg LR{TEMP_GPR_2_ID};
+
+static const FPReg TEMP_FPR{TEMP_FPR_ID};
+
+static const util::Cpu s_cpu;
+
+class AArch64Allocator : public Allocator
+{
+ private:
+#ifdef XBYAK_USE_MMAP_ALLOCATOR
+ inline static MmapAllocator s_allocator;
+#else
+ inline static Allocator s_allocator;
+#endif
+ Allocator* m_allocatorImpl;
+ bool m_freeDisabled = false;
+
+ public:
+ AArch64Allocator()
+ : m_allocatorImpl(reinterpret_cast(&s_allocator)) {}
+
+ uint32* alloc(size_t size) override
+ {
+ return m_allocatorImpl->alloc(size);
+ }
+
+ void setFreeDisabled(bool disabled)
+ {
+ m_freeDisabled = disabled;
+ }
+
+ void free(uint32* p) override
+ {
+ if (!m_freeDisabled)
+ m_allocatorImpl->free(p);
+ }
+
+ [[nodiscard]] bool useProtect() const override
+ {
+ return !m_freeDisabled && m_allocatorImpl->useProtect();
+ }
+};
+
+struct UnconditionalJumpInfo
+{
+ IMLSegment* target;
+};
+
+struct ConditionalRegJumpInfo
+{
+ IMLSegment* target;
+ WReg regBool;
+ bool mustBeTrue;
+};
+
+struct NegativeRegValueJumpInfo
+{
+ IMLSegment* target;
+ WReg regValue;
+};
+
+using JumpInfo = std::variant<
+ UnconditionalJumpInfo,
+ ConditionalRegJumpInfo,
+ NegativeRegValueJumpInfo>;
+
+struct AArch64GenContext_t : CodeGenerator
+{
+ explicit AArch64GenContext_t(Allocator* allocator = nullptr);
+ void enterRecompilerCode();
+ void leaveRecompilerCode();
+
+ void r_name(IMLInstruction* imlInstruction);
+ void name_r(IMLInstruction* imlInstruction);
+ bool r_s32(IMLInstruction* imlInstruction);
+ bool r_r(IMLInstruction* imlInstruction);
+ bool r_r_s32(IMLInstruction* imlInstruction);
+ bool r_r_s32_carry(IMLInstruction* imlInstruction);
+ bool r_r_r(IMLInstruction* imlInstruction);
+ bool r_r_r_carry(IMLInstruction* imlInstruction);
+ void compare(IMLInstruction* imlInstruction);
+ void compare_s32(IMLInstruction* imlInstruction);
+ bool load(IMLInstruction* imlInstruction, bool indexed);
+ bool store(IMLInstruction* imlInstruction, bool indexed);
+ void atomic_cmp_store(IMLInstruction* imlInstruction);
+ bool macro(IMLInstruction* imlInstruction);
+ void call_imm(IMLInstruction* imlInstruction);
+ bool fpr_load(IMLInstruction* imlInstruction, bool indexed);
+ bool fpr_store(IMLInstruction* imlInstruction, bool indexed);
+ void fpr_r_r(IMLInstruction* imlInstruction);
+ void fpr_r_r_r(IMLInstruction* imlInstruction);
+ void fpr_r_r_r_r(IMLInstruction* imlInstruction);
+ void fpr_r(IMLInstruction* imlInstruction);
+ void fpr_compare(IMLInstruction* imlInstruction);
+ void cjump(IMLInstruction* imlInstruction, IMLSegment* imlSegment);
+ void jump(IMLSegment* imlSegment);
+ void conditionalJumpCycleCheck(IMLSegment* imlSegment);
+
+ static constexpr size_t MAX_JUMP_INSTR_COUNT = 2;
+ std::list> jumps;
+ void prepareJump(JumpInfo&& jumpInfo)
+ {
+ jumps.emplace_back(getSize(), jumpInfo);
+ for (int i = 0; i < MAX_JUMP_INSTR_COUNT; ++i)
+ nop();
+ }
+
+ std::map segmentStarts;
+ void storeSegmentStart(IMLSegment* imlSegment)
+ {
+ segmentStarts[imlSegment] = getSize();
+ }
+
+ bool processAllJumps()
+ {
+ for (auto jump : jumps)
+ {
+ auto jumpStart = jump.first;
+ auto jumpInfo = jump.second;
+ bool success = std::visit(
+ [&, this](const auto& jump) {
+ setSize(jumpStart);
+ sint64 targetAddress = segmentStarts.at(jump.target);
+ sint64 addressOffset = targetAddress - jumpStart;
+ return handleJump(addressOffset, jump);
+ },
+ jumpInfo);
+ if (!success)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool handleJump(sint64 addressOffset, const UnconditionalJumpInfo& jump)
+ {
+ // in +/-128MB
+ if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
+ {
+ b(addressOffset);
+ return true;
+ }
+
+ cemu_assert_suspicious();
+
+ return false;
+ }
+
+ bool handleJump(sint64 addressOffset, const ConditionalRegJumpInfo& jump)
+ {
+ bool mustBeTrue = jump.mustBeTrue;
+
+ // in +/-32KB
+ if (-0x8000 <= addressOffset && addressOffset <= 0x7fff)
+ {
+ if (mustBeTrue)
+ tbnz(jump.regBool, 0, addressOffset);
+ else
+ tbz(jump.regBool, 0, addressOffset);
+ return true;
+ }
+
+ // in +/-1MB
+ if (-0x100000 <= addressOffset && addressOffset <= 0xfffff)
+ {
+ if (mustBeTrue)
+ cbnz(jump.regBool, addressOffset);
+ else
+ cbz(jump.regBool, addressOffset);
+ return true;
+ }
+
+ Label skipJump;
+ if (mustBeTrue)
+ tbz(jump.regBool, 0, skipJump);
+ else
+ tbnz(jump.regBool, 0, skipJump);
+ addressOffset -= 4;
+
+ // in +/-128MB
+ if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
+ {
+ b(addressOffset);
+ L(skipJump);
+ return true;
+ }
+
+ cemu_assert_suspicious();
+
+ return false;
+ }
+
+ bool handleJump(sint64 addressOffset, const NegativeRegValueJumpInfo& jump)
+ {
+ // in +/-32KB
+ if (-0x8000 <= addressOffset && addressOffset <= 0x7fff)
+ {
+ tbnz(jump.regValue, 31, addressOffset);
+ return true;
+ }
+
+ // in +/-1MB
+ if (-0x100000 <= addressOffset && addressOffset <= 0xfffff)
+ {
+ tst(jump.regValue, 0x80000000);
+ addressOffset -= 4;
+ bne(addressOffset);
+ return true;
+ }
+
+ Label skipJump;
+ tbz(jump.regValue, 31, skipJump);
+ addressOffset -= 4;
+
+ // in +/-128MB
+ if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
+ {
+ b(addressOffset);
+ L(skipJump);
+ return true;
+ }
+
+ cemu_assert_suspicious();
+
+ return false;
+ }
+};
+
+template T>
+T fpReg(const IMLReg& imlReg)
+{
+ cemu_assert_debug(imlReg.GetRegFormat() == IMLRegFormat::F64);
+ auto regId = imlReg.GetRegID();
+ cemu_assert_debug(regId >= IMLArchAArch64::PHYSREG_FPR_BASE && regId < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT);
+ return T(regId - IMLArchAArch64::PHYSREG_FPR_BASE);
+}
+
+template T>
+T gpReg(const IMLReg& imlReg)
+{
+ auto regFormat = imlReg.GetRegFormat();
+ if (std::is_same_v)
+ cemu_assert_debug(regFormat == IMLRegFormat::I32);
+ else if (std::is_same_v)
+ cemu_assert_debug(regFormat == IMLRegFormat::I64);
+ else
+ cemu_assert_unimplemented();
+
+ auto regId = imlReg.GetRegID();
+ cemu_assert_debug(regId >= IMLArchAArch64::PHYSREG_GPR_BASE && regId < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT);
+ return T(regId - IMLArchAArch64::PHYSREG_GPR_BASE);
+}
+
+template To, std::derived_from From>
+To aliasAs(const From& reg)
+{
+ return To(reg.getIdx());
+}
+
+template To, std::derived_from From>
+To aliasAs(const From& reg)
+{
+ return To(reg.getIdx());
+}
+
+AArch64GenContext_t::AArch64GenContext_t(Allocator* allocator)
+ : CodeGenerator(DEFAULT_MAX_CODE_SIZE, AutoGrow, allocator)
+{
+}
+
+constexpr uint64 ones(uint32 size)
+{
+ return (size == 64) ? 0xffffffffffffffff : ((uint64)1 << size) - 1;
+}
+
+constexpr bool isAdrImmValidFPR(sint32 imm, uint32 bits)
+{
+ uint32 times = bits / 8;
+ uint32 sh = std::countr_zero(times);
+ return (0 <= imm && imm <= 4095 * times) && ((uint64)imm & ones(sh)) == 0;
+}
+
+constexpr bool isAdrImmValidGPR(sint32 imm, uint32 bits = 32)
+{
+ uint32 size = std::countr_zero(bits / 8u);
+ sint32 times = 1 << size;
+ return (0 <= imm && imm <= 4095 * times) && ((uint64)imm & ones(size)) == 0;
+}
+
+constexpr bool isAdrImmRangeValid(sint32 rangeStart, sint32 rangeOffset, sint32 bits, std::invocable auto check)
+{
+ for (sint32 i = rangeStart; i <= rangeStart + rangeOffset; i += bits / 8)
+ if (!check(i, bits))
+ return false;
+ return true;
+}
+
+constexpr bool isAdrImmRangeValidGPR(sint32 rangeStart, sint32 rangeOffset, sint32 bits = 32)
+{
+ return isAdrImmRangeValid(rangeStart, rangeOffset, bits, isAdrImmValidGPR);
+}
+
+constexpr bool isAdrImmRangeValidFpr(sint32 rangeStart, sint32 rangeOffset, sint32 bits)
+{
+ return isAdrImmRangeValid(rangeStart, rangeOffset, bits, isAdrImmValidFPR);
+}
+
+// Verify that all of the offsets for the PPCInterpreter_t members that we use in r_name/name_r have a valid imm value for AdrUimm
+static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, gpr), sizeof(uint32) * 31));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.LR)));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.CTR)));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.XER)));
+static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, spr.UGQR), sizeof(PPCInterpreter_t::spr.UGQR[0]) * (SPR_UGQR7 - SPR_UGQR0)));
+static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, temporaryGPR_reg), sizeof(uint32) * 3));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, xer_ca), 8));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, xer_so), 8));
+static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, cr), PPCREC_NAME_CR_LAST - PPCREC_NAME_CR, 8));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, reservedMemAddr)));
+static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, reservedMemValue)));
+static_assert(isAdrImmRangeValidFpr(offsetof(PPCInterpreter_t, fpr), sizeof(FPR_t) * 63, 64));
+static_assert(isAdrImmRangeValidFpr(offsetof(PPCInterpreter_t, temporaryFPR), sizeof(FPR_t) * 7, 128));
+
+void AArch64GenContext_t::r_name(IMLInstruction* imlInstruction)
+{
+ uint32 name = imlInstruction->op_r_name.name;
+
+ if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ XReg regRXReg = gpReg(imlInstruction->op_r_name.regR);
+ WReg regR = aliasAs(regRXReg);
+ if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
+ {
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)));
+ }
+ else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
+ {
+ uint32 sprIndex = (name - PPCREC_NAME_SPR0);
+ if (sprIndex == SPR_LR)
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
+ else if (sprIndex == SPR_CTR)
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.CTR)));
+ else if (sprIndex == SPR_XER)
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.XER)));
+ else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0)));
+ else
+ cemu_assert_suspicious();
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
+ {
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)));
+ }
+ else if (name == PPCREC_NAME_XER_CA)
+ {
+ ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_ca)));
+ }
+ else if (name == PPCREC_NAME_XER_SO)
+ {
+ ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_so)));
+ }
+ else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
+ {
+ ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_EA)
+ {
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemAddr)));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
+ {
+ ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemValue)));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ }
+ else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ auto imlRegR = imlInstruction->op_r_name.regR;
+
+ if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
+ {
+ uint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
+ uint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
+ uint32 offset = offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0);
+ ldr(fpReg(imlRegR), AdrUimm(HCPU_REG, offset));
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
+ {
+ ldr(fpReg(imlRegR), AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+void AArch64GenContext_t::name_r(IMLInstruction* imlInstruction)
+{
+ uint32 name = imlInstruction->op_r_name.name;
+
+ if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ XReg regRXReg = gpReg(imlInstruction->op_r_name.regR);
+ WReg regR = aliasAs(regRXReg);
+ if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
+ {
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)));
+ }
+ else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
+ {
+ uint32 sprIndex = (name - PPCREC_NAME_SPR0);
+ if (sprIndex == SPR_LR)
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
+ else if (sprIndex == SPR_CTR)
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.CTR)));
+ else if (sprIndex == SPR_XER)
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.XER)));
+ else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0)));
+ else
+ cemu_assert_suspicious();
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
+ {
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)));
+ }
+ else if (name == PPCREC_NAME_XER_CA)
+ {
+ strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_ca)));
+ }
+ else if (name == PPCREC_NAME_XER_SO)
+ {
+ strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_so)));
+ }
+ else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
+ {
+ strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_EA)
+ {
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemAddr)));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
+ {
+ str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemValue)));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ }
+ else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ auto imlRegR = imlInstruction->op_r_name.regR;
+ if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
+ {
+ uint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
+ uint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
+ sint32 offset = offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double);
+ str(fpReg(imlRegR), AdrUimm(HCPU_REG, offset));
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
+ {
+ str(fpReg(imlRegR), AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+bool AArch64GenContext_t::r_r(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_r_r.regR);
+ WReg regA = gpReg(imlInstruction->op_r_r.regA);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
+ {
+ mov(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
+ {
+ rev(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32)
+ {
+ sxtb(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
+ {
+ sxth(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_NOT)
+ {
+ mvn(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
+ {
+ neg(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_CNTLZW)
+ {
+ clz(regR, regA);
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r(): Unsupported operation {:x}", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::r_s32(IMLInstruction* imlInstruction)
+{
+ sint32 imm32 = imlInstruction->op_r_immS32.immS32;
+ WReg reg = gpReg(imlInstruction->op_r_immS32.regR);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
+ {
+ mov(reg, imm32);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE)
+ {
+ ror(reg, reg, 32 - (imm32 & 0x1f));
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_s32(): Unsupported operation {:x}", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::r_r_s32(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_r_r_s32.regR);
+ WReg regA = gpReg(imlInstruction->op_r_r_s32.regA);
+ sint32 immS32 = imlInstruction->op_r_r_s32.immS32;
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ add_imm(regR, regA, immS32, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
+ {
+ sub_imm(regR, regA, immS32, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_AND)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ and_(regR, regA, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_OR)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ orr(regR, regA, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_XOR)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ eor(regR, regA, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ mul(regR, regA, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ {
+ lsl(regR, regA, (uint32)immS32 & 0x1f);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ {
+ lsr(regR, regA, (uint32)immS32 & 0x1f);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ {
+ asr(regR, regA, (uint32)immS32 & 0x1f);
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r_s32(): Unsupported operation {:x}", imlInstruction->operation);
+ cemu_assert_suspicious();
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::r_r_s32_carry(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_r_r_s32_carry.regR);
+ WReg regA = gpReg(imlInstruction->op_r_r_s32_carry.regA);
+ WReg regCarry = gpReg(imlInstruction->op_r_r_s32_carry.regCarry);
+
+ sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32;
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ adds_imm(regR, regA, immS32, TEMP_GPR1.WReg);
+ cset(regCarry, Cond::CS);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ADD_WITH_CARRY)
+ {
+ mov(TEMP_GPR1.WReg, immS32);
+ cmp(regCarry, 1);
+ adcs(regR, regA, TEMP_GPR1.WReg);
+ cset(regCarry, Cond::CS);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ return false;
+ }
+
+ return true;
+}
+
+bool AArch64GenContext_t::r_r_r(IMLInstruction* imlInstruction)
+{
+ WReg regResult = gpReg(imlInstruction->op_r_r_r.regR);
+ XReg reg64Result = aliasAs(regResult);
+ WReg regOperand1 = gpReg(imlInstruction->op_r_r_r.regA);
+ WReg regOperand2 = gpReg(imlInstruction->op_r_r_r.regB);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ add(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
+ {
+ sub(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_OR)
+ {
+ orr(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_AND)
+ {
+ and_(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_XOR)
+ {
+ eor(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED)
+ {
+ mul(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SLW)
+ {
+ tst(regOperand2, 32);
+ lsl(regResult, regOperand1, regOperand2);
+ csel(regResult, regResult, wzr, Cond::EQ);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SRW)
+ {
+ tst(regOperand2, 32);
+ lsr(regResult, regOperand1, regOperand2);
+ csel(regResult, regResult, wzr, Cond::EQ);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE)
+ {
+ neg(TEMP_GPR1.WReg, regOperand2);
+ ror(regResult, regOperand1, TEMP_GPR1.WReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ {
+ asr(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ {
+ lsr(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ {
+ lsl(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED)
+ {
+ sdiv(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED)
+ {
+ udiv(regResult, regOperand1, regOperand2);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED)
+ {
+ smull(reg64Result, regOperand1, regOperand2);
+ lsr(reg64Result, reg64Result, 32);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED)
+ {
+ umull(reg64Result, regOperand1, regOperand2);
+ lsr(reg64Result, reg64Result, 32);
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r_r(): Unsupported operation {:x}", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::r_r_r_carry(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_r_r_r_carry.regR);
+ WReg regA = gpReg(imlInstruction->op_r_r_r_carry.regA);
+ WReg regB = gpReg(imlInstruction->op_r_r_r_carry.regB);
+ WReg regCarry = gpReg(imlInstruction->op_r_r_r_carry.regCarry);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ adds(regR, regA, regB);
+ cset(regCarry, Cond::CS);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ADD_WITH_CARRY)
+ {
+ cmp(regCarry, 1);
+ adcs(regR, regA, regB);
+ cset(regCarry, Cond::CS);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ return false;
+ }
+
+ return true;
+}
+
+Cond ImlCondToArm64Cond(IMLCondition condition)
+{
+ switch (condition)
+ {
+ case IMLCondition::EQ:
+ return Cond::EQ;
+ case IMLCondition::NEQ:
+ return Cond::NE;
+ case IMLCondition::UNSIGNED_GT:
+ return Cond::HI;
+ case IMLCondition::UNSIGNED_LT:
+ return Cond::LO;
+ case IMLCondition::SIGNED_GT:
+ return Cond::GT;
+ case IMLCondition::SIGNED_LT:
+ return Cond::LT;
+ default:
+ {
+ cemu_assert_suspicious();
+ return Cond::EQ;
+ }
+ }
+}
+
+void AArch64GenContext_t::compare(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_compare.regR);
+ WReg regA = gpReg(imlInstruction->op_compare.regA);
+ WReg regB = gpReg(imlInstruction->op_compare.regB);
+ Cond cond = ImlCondToArm64Cond(imlInstruction->op_compare.cond);
+ cmp(regA, regB);
+ cset(regR, cond);
+}
+
+void AArch64GenContext_t::compare_s32(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_compare.regR);
+ WReg regA = gpReg(imlInstruction->op_compare.regA);
+ sint32 imm = imlInstruction->op_compare_s32.immS32;
+ auto cond = ImlCondToArm64Cond(imlInstruction->op_compare.cond);
+ cmp_imm(regA, imm, TEMP_GPR1.WReg);
+ cset(regR, cond);
+}
+
+void AArch64GenContext_t::cjump(IMLInstruction* imlInstruction, IMLSegment* imlSegment)
+{
+ auto regBool = gpReg(imlInstruction->op_conditional_jump.registerBool);
+ prepareJump(ConditionalRegJumpInfo{
+ .target = imlSegment->nextSegmentBranchTaken,
+ .regBool = regBool,
+ .mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue,
+ });
+}
+
+void AArch64GenContext_t::jump(IMLSegment* imlSegment)
+{
+ prepareJump(UnconditionalJumpInfo{.target = imlSegment->nextSegmentBranchTaken});
+}
+
+void AArch64GenContext_t::conditionalJumpCycleCheck(IMLSegment* imlSegment)
+{
+ ldr(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles)));
+ prepareJump(NegativeRegValueJumpInfo{
+ .target = imlSegment->nextSegmentBranchTaken,
+ .regValue = TEMP_GPR1.WReg,
+ });
+}
+
+void* PPCRecompiler_virtualHLE(PPCInterpreter_t* ppcInterpreter, uint32 hleFuncId)
+{
+ void* prevRSPTemp = ppcInterpreter->rspTemp;
+ if (hleFuncId == 0xFFD0)
+ {
+ ppcInterpreter->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call
+ ppcInterpreter->gpr[3] = 0;
+ PPCInterpreter_nextInstruction(ppcInterpreter);
+ return PPCInterpreter_getCurrentInstance();
+ }
+ else
+ {
+ auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
+ cemu_assert(hleCall != nullptr);
+ hleCall(ppcInterpreter);
+ }
+ ppcInterpreter->rspTemp = prevRSPTemp;
+ return PPCInterpreter_getCurrentInstance();
+}
+
+bool AArch64GenContext_t::macro(IMLInstruction* imlInstruction)
+{
+ if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ WReg branchDstReg = gpReg(imlInstruction->op_macro.paramReg);
+
+ mov(TEMP_GPR1.WReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, branchDstReg, ShMod::LSL, 1);
+ ldr(TEMP_GPR1.XReg, AdrExt(PPC_REC_INSTANCE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ mov(LR.WReg, branchDstReg);
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_BL)
+ {
+ uint32 newLR = imlInstruction->op_macro.param + 4;
+
+ mov(TEMP_GPR1.WReg, newLR);
+ str(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
+
+ uint32 newIP = imlInstruction->op_macro.param2;
+ uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ mov(TEMP_GPR1.XReg, lookupOffset);
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ mov(LR.WReg, newIP);
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_B_FAR)
+ {
+ uint32 newIP = imlInstruction->op_macro.param2;
+ uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ mov(TEMP_GPR1.XReg, lookupOffset);
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ mov(LR.WReg, newIP);
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_LEAVE)
+ {
+ uint32 currentInstructionAddress = imlInstruction->op_macro.param;
+ mov(TEMP_GPR1.XReg, (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); // newIP = 0 special value for recompiler exit
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ mov(LR.WReg, currentInstructionAddress);
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK)
+ {
+ brk(0xf000);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES)
+ {
+ uint32 cycleCount = imlInstruction->op_macro.param;
+ AdrUimm adrCycles = AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles));
+ ldr(TEMP_GPR1.WReg, adrCycles);
+ sub_imm(TEMP_GPR1.WReg, TEMP_GPR1.WReg, cycleCount, TEMP_GPR2.WReg);
+ str(TEMP_GPR1.WReg, adrCycles);
+ return true;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_MACRO_HLE)
+ {
+ uint32 ppcAddress = imlInstruction->op_macro.param;
+ uint32 funcId = imlInstruction->op_macro.param2;
+ Label cyclesLeftLabel;
+
+ // update instruction pointer
+ mov(TEMP_GPR1.WReg, ppcAddress);
+ str(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
+ // set parameters
+ str(x30, AdrPreImm(sp, -16));
+
+ mov(x0, HCPU_REG);
+ mov(w1, funcId);
+ // call HLE function
+
+ mov(TEMP_GPR1.XReg, (uint64)PPCRecompiler_virtualHLE);
+ blr(TEMP_GPR1.XReg);
+
+ mov(HCPU_REG, x0);
+
+ ldr(x30, AdrPostImm(sp, 16));
+
+ // check if cycles where decreased beyond zero, if yes -> leave recompiler
+ ldr(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles)));
+ tbz(TEMP_GPR1.WReg, 31, cyclesLeftLabel); // check if negative
+
+ mov(TEMP_GPR1.XReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ ldr(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
+ // branch to recompiler exit
+ br(TEMP_GPR1.XReg);
+
+ L(cyclesLeftLabel);
+ // check if instruction pointer was changed
+ // assign new instruction pointer to LR.WReg
+ ldr(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
+ mov(TEMP_GPR1.XReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ add(TEMP_GPR1.XReg, TEMP_GPR1.XReg, LR.XReg, ShMod::LSL, 1);
+ ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
+ // branch to [ppcRecompilerDirectJumpTable + PPCInterpreter_t::instructionPointer * 2]
+ br(TEMP_GPR1.XReg);
+ return true;
+ }
+ else
+ {
+ cemuLog_log(LogType::Recompiler, "Unknown recompiler macro operation %d\n", imlInstruction->operation);
+ cemu_assert_suspicious();
+ }
+ return false;
+}
+
+bool AArch64GenContext_t::load(IMLInstruction* imlInstruction, bool indexed)
+{
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
+ if (indexed)
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
+
+ sint32 memOffset = imlInstruction->op_storeLoad.immS32;
+ bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
+ bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
+ WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
+ WReg dataReg = gpReg(imlInstruction->op_storeLoad.registerData);
+
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, gpReg(imlInstruction->op_storeLoad.registerMem2));
+
+ auto adr = AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW);
+ if (imlInstruction->op_storeLoad.copyWidth == 32)
+ {
+ ldr(dataReg, adr);
+ if (switchEndian)
+ rev(dataReg, dataReg);
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 16)
+ {
+ if (switchEndian)
+ {
+ ldrh(dataReg, adr);
+ rev(dataReg, dataReg);
+ if (signExtend)
+ asr(dataReg, dataReg, 16);
+ else
+ lsr(dataReg, dataReg, 16);
+ }
+ else
+ {
+ if (signExtend)
+ ldrsh(dataReg, adr);
+ else
+ ldrh(dataReg, adr);
+ }
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 8)
+ {
+ if (signExtend)
+ ldrsb(dataReg, adr);
+ else
+ ldrb(dataReg, adr);
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+bool AArch64GenContext_t::store(IMLInstruction* imlInstruction, bool indexed)
+{
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
+ if (indexed)
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
+
+ WReg dataReg = gpReg(imlInstruction->op_storeLoad.registerData);
+ WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
+ sint32 memOffset = imlInstruction->op_storeLoad.immS32;
+ bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
+
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, gpReg(imlInstruction->op_storeLoad.registerMem2));
+ AdrExt adr = AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW);
+ if (imlInstruction->op_storeLoad.copyWidth == 32)
+ {
+ if (swapEndian)
+ {
+ rev(TEMP_GPR2.WReg, dataReg);
+ str(TEMP_GPR2.WReg, adr);
+ }
+ else
+ {
+ str(dataReg, adr);
+ }
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 16)
+ {
+ if (swapEndian)
+ {
+ rev(TEMP_GPR2.WReg, dataReg);
+ lsr(TEMP_GPR2.WReg, TEMP_GPR2.WReg, 16);
+ strh(TEMP_GPR2.WReg, adr);
+ }
+ else
+ {
+ strh(dataReg, adr);
+ }
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 8)
+ {
+ strb(dataReg, adr);
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+void AArch64GenContext_t::atomic_cmp_store(IMLInstruction* imlInstruction)
+{
+ WReg outReg = gpReg(imlInstruction->op_atomic_compare_store.regBoolOut);
+ WReg eaReg = gpReg(imlInstruction->op_atomic_compare_store.regEA);
+ WReg valReg = gpReg(imlInstruction->op_atomic_compare_store.regWriteValue);
+ WReg cmpValReg = gpReg(imlInstruction->op_atomic_compare_store.regCompareValue);
+
+ if (s_cpu.isAtomicSupported())
+ {
+ mov(TEMP_GPR2.WReg, cmpValReg);
+ add(TEMP_GPR1.XReg, MEM_BASE_REG, eaReg, ExtMod::UXTW);
+ casal(TEMP_GPR2.WReg, valReg, AdrNoOfs(TEMP_GPR1.XReg));
+ cmp(TEMP_GPR2.WReg, cmpValReg);
+ cset(outReg, Cond::EQ);
+ }
+ else
+ {
+ Label notEqual;
+ Label storeFailed;
+
+ add(TEMP_GPR1.XReg, MEM_BASE_REG, eaReg, ExtMod::UXTW);
+ L(storeFailed);
+ ldaxr(TEMP_GPR2.WReg, AdrNoOfs(TEMP_GPR1.XReg));
+ cmp(TEMP_GPR2.WReg, cmpValReg);
+ bne(notEqual);
+ stlxr(TEMP_GPR2.WReg, valReg, AdrNoOfs(TEMP_GPR1.XReg));
+ cbnz(TEMP_GPR2.WReg, storeFailed);
+
+ L(notEqual);
+ cset(outReg, Cond::EQ);
+ }
+}
+
+bool AArch64GenContext_t::fpr_load(IMLInstruction* imlInstruction, bool indexed)
+{
+ const IMLReg& dataReg = imlInstruction->op_storeLoad.registerData;
+ SReg dataSReg = fpReg(dataReg);
+ DReg dataDReg = fpReg(dataReg);
+ WReg realRegisterMem = gpReg(imlInstruction->op_storeLoad.registerMem);
+ WReg indexReg = indexed ? gpReg(imlInstruction->op_storeLoad.registerMem2) : wzr;
+ sint32 adrOffset = imlInstruction->op_storeLoad.immS32;
+ uint8 mode = imlInstruction->op_storeLoad.mode;
+
+ if (mode == PPCREC_FPR_LD_MODE_SINGLE)
+ {
+ add_imm(TEMP_GPR1.WReg, realRegisterMem, adrOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+ ldr(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
+ fmov(dataSReg, TEMP_GPR2.WReg);
+
+ if (imlInstruction->op_storeLoad.flags2.notExpanded)
+ {
+ // leave value as single
+ }
+ else
+ {
+ fcvt(dataDReg, dataSReg);
+ }
+ }
+ else if (mode == PPCREC_FPR_LD_MODE_DOUBLE)
+ {
+ add_imm(TEMP_GPR1.WReg, realRegisterMem, adrOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+ ldr(TEMP_GPR2.XReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ rev(TEMP_GPR2.XReg, TEMP_GPR2.XReg);
+ fmov(dataDReg, TEMP_GPR2.XReg);
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+// store to memory
+bool AArch64GenContext_t::fpr_store(IMLInstruction* imlInstruction, bool indexed)
+{
+ const IMLReg& dataImlReg = imlInstruction->op_storeLoad.registerData;
+ DReg dataDReg = fpReg(dataImlReg);
+ SReg dataSReg = fpReg(dataImlReg);
+ WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
+ WReg indexReg = indexed ? gpReg(imlInstruction->op_storeLoad.registerMem2) : wzr;
+ sint32 memOffset = imlInstruction->op_storeLoad.immS32;
+ uint8 mode = imlInstruction->op_storeLoad.mode;
+
+ if (mode == PPCREC_FPR_ST_MODE_SINGLE)
+ {
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+
+ if (imlInstruction->op_storeLoad.flags2.notExpanded)
+ {
+ // value is already in single format
+ fmov(TEMP_GPR2.WReg, dataSReg);
+ }
+ else
+ {
+ fcvt(TEMP_FPR.SReg, dataDReg);
+ fmov(TEMP_GPR2.WReg, TEMP_FPR.SReg);
+ }
+ rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
+ str(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ }
+ else if (mode == PPCREC_FPR_ST_MODE_DOUBLE)
+ {
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+ fmov(TEMP_GPR2.XReg, dataDReg);
+ rev(TEMP_GPR2.XReg, TEMP_GPR2.XReg);
+ str(TEMP_GPR2.XReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ }
+ else if (mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0)
+ {
+ add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
+ if (indexed)
+ add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
+ fmov(TEMP_GPR2.WReg, dataSReg);
+ rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
+ str(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
+ return false;
+ }
+ return true;
+}
+
+// FPR op FPR
+void AArch64GenContext_t::fpr_r_r(IMLInstruction* imlInstruction)
+{
+ auto imlRegR = imlInstruction->op_fpr_r_r.regR;
+ auto imlRegA = imlInstruction->op_fpr_r_r.regA;
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT)
+ {
+ fcvtzs(gpReg(imlRegR), fpReg(imlRegA));
+ return;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
+ {
+ scvtf(fpReg(imlRegR), gpReg(imlRegA));
+ return;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
+ {
+ cemu_assert_debug(imlRegR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
+ // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
+ DReg regFprDReg = fpReg(imlRegR);
+ SReg regFprSReg = fpReg(imlRegR);
+ if (imlRegA.GetRegFormat() == IMLRegFormat::I32)
+ {
+ fmov(regFprSReg, gpReg(imlRegA));
+ // float to double
+ fcvt(regFprDReg, regFprSReg);
+ }
+ else if (imlRegA.GetRegFormat() == IMLRegFormat::I64)
+ {
+ fmov(regFprDReg, gpReg(imlRegA));
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+ return;
+ }
+
+ DReg regR = fpReg(imlRegR);
+ DReg regA = fpReg(imlRegA);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN)
+ {
+ fmov(regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
+ {
+ fmul(regR, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE)
+ {
+ fdiv(regR, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
+ {
+ fadd(regR, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB)
+ {
+ fsub(regR, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ)
+ {
+ fcvtzs(regR, regA);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+void AArch64GenContext_t::fpr_r_r_r(IMLInstruction* imlInstruction)
+{
+ DReg regR = fpReg(imlInstruction->op_fpr_r_r_r.regR);
+ DReg regA = fpReg(imlInstruction->op_fpr_r_r_r.regA);
+ DReg regB = fpReg(imlInstruction->op_fpr_r_r_r.regB);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
+ {
+ fmul(regR, regA, regB);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
+ {
+ fadd(regR, regA, regB);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB)
+ {
+ fsub(regR, regA, regB);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+/*
+ * FPR = op (fprA, fprB, fprC)
+ */
+void AArch64GenContext_t::fpr_r_r_r_r(IMLInstruction* imlInstruction)
+{
+ DReg regR = fpReg(imlInstruction->op_fpr_r_r_r_r.regR);
+ DReg regA = fpReg(imlInstruction->op_fpr_r_r_r_r.regA);
+ DReg regB = fpReg(imlInstruction->op_fpr_r_r_r_r.regB);
+ DReg regC = fpReg(imlInstruction->op_fpr_r_r_r_r.regC);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT)
+ {
+ fcmp(regA, 0.0);
+ fcsel(regR, regC, regB, Cond::GE);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+}
+
+void AArch64GenContext_t::fpr_r(IMLInstruction* imlInstruction)
+{
+ DReg regRDReg = fpReg(imlInstruction->op_fpr_r.regR);
+ SReg regRSReg = fpReg(imlInstruction->op_fpr_r.regR);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE)
+ {
+ fneg(regRDReg, regRDReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE)
+ {
+ fmov(regRDReg, 1.0);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ABS)
+ {
+ fabs(regRDReg, regRDReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS)
+ {
+ fabs(regRDReg, regRDReg);
+ fneg(regRDReg, regRDReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
+ {
+ // convert to 32bit single
+ fcvt(regRSReg, regRDReg);
+ // convert back to 64bit double
+ fcvt(regRDReg, regRSReg);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
+ {
+ // convert bottom to 64bit double
+ fcvt(regRDReg, regRSReg);
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+}
+
+Cond ImlFPCondToArm64Cond(IMLCondition cond)
+{
+ switch (cond)
+ {
+ case IMLCondition::UNORDERED_GT:
+ return Cond::GT;
+ case IMLCondition::UNORDERED_LT:
+ return Cond::MI;
+ case IMLCondition::UNORDERED_EQ:
+ return Cond::EQ;
+ case IMLCondition::UNORDERED_U:
+ return Cond::VS;
+ default:
+ {
+ cemu_assert_suspicious();
+ return Cond::EQ;
+ }
+ }
+}
+
+void AArch64GenContext_t::fpr_compare(IMLInstruction* imlInstruction)
+{
+ WReg regR = gpReg(imlInstruction->op_fpr_compare.regR);
+ DReg regA = fpReg(imlInstruction->op_fpr_compare.regA);
+ DReg regB = fpReg(imlInstruction->op_fpr_compare.regB);
+ auto cond = ImlFPCondToArm64Cond(imlInstruction->op_fpr_compare.cond);
+ fcmp(regA, regB);
+ cset(regR, cond);
+}
+
+void AArch64GenContext_t::call_imm(IMLInstruction* imlInstruction)
+{
+ str(x30, AdrPreImm(sp, -16));
+ mov(TEMP_GPR1.XReg, imlInstruction->op_call_imm.callAddress);
+ blr(TEMP_GPR1.XReg);
+ ldr(x30, AdrPostImm(sp, 16));
+}
+
+bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext)
+{
+ AArch64Allocator allocator;
+ AArch64GenContext_t aarch64GenContext{&allocator};
+
+ // generate iml instruction code
+ bool codeGenerationFailed = false;
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ if (codeGenerationFailed)
+ break;
+ segIt->x64Offset = aarch64GenContext.getSize();
+
+ aarch64GenContext.storeSegmentStart(segIt);
+
+ for (size_t i = 0; i < segIt->imlList.size(); i++)
+ {
+ IMLInstruction* imlInstruction = segIt->imlList.data() + i;
+ if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME)
+ {
+ aarch64GenContext.r_name(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_NAME_R)
+ {
+ aarch64GenContext.name_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R)
+ {
+ if (!aarch64GenContext.r_r(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
+ {
+ if (!aarch64GenContext.r_s32(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ if (!aarch64GenContext.r_r_s32(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ if (!aarch64GenContext.r_r_s32_carry(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
+ {
+ if (!aarch64GenContext.r_r_r(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ if (!aarch64GenContext.r_r_r_carry(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
+ {
+ aarch64GenContext.compare(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ aarch64GenContext.compare_s32(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ aarch64GenContext.cjump(imlInstruction, segIt);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
+ {
+ aarch64GenContext.jump(segIt);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
+ {
+ aarch64GenContext.conditionalJumpCycleCheck(segIt);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO)
+ {
+ if (!aarch64GenContext.macro(imlInstruction))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD)
+ {
+ if (!aarch64GenContext.load(imlInstruction, false))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ {
+ if (!aarch64GenContext.load(imlInstruction, true))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_STORE)
+ {
+ if (!aarch64GenContext.store(imlInstruction, false))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED)
+ {
+ if (!aarch64GenContext.store(imlInstruction, true))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ aarch64GenContext.atomic_cmp_store(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ aarch64GenContext.call_imm(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_NO_OP)
+ {
+ // no op
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD)
+ {
+ if (!aarch64GenContext.fpr_load(imlInstruction, false))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
+ {
+ if (!aarch64GenContext.fpr_load(imlInstruction, true))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE)
+ {
+ if (!aarch64GenContext.fpr_store(imlInstruction, false))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
+ {
+ if (!aarch64GenContext.fpr_store(imlInstruction, true))
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R)
+ {
+ aarch64GenContext.fpr_r_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R)
+ {
+ aarch64GenContext.fpr_r_r_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ {
+ aarch64GenContext.fpr_r_r_r_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
+ {
+ aarch64GenContext.fpr_r(imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE)
+ {
+ aarch64GenContext.fpr_compare(imlInstruction);
+ }
+ else
+ {
+ codeGenerationFailed = true;
+ cemu_assert_suspicious();
+ cemuLog_log(LogType::Recompiler, "PPCRecompiler_generateAArch64Code(): Unsupported iml type {}", imlInstruction->type);
+ }
+ }
+ }
+
+ // handle failed code generation
+ if (codeGenerationFailed)
+ {
+ return false;
+ }
+
+ if (!aarch64GenContext.processAllJumps())
+ {
+ cemuLog_log(LogType::Recompiler, "PPCRecompiler_generateAArch64Code(): some jumps exceeded the +/-128MB offset.");
+ return false;
+ }
+
+ aarch64GenContext.readyRE();
+
+ // set code
+ PPCRecFunction->x86Code = aarch64GenContext.getCode();
+ PPCRecFunction->x86Size = aarch64GenContext.getMaxSize();
+ // set free disabled to skip freeing the code from the CodeGenerator destructor
+ allocator.setFreeDisabled(true);
+ return true;
+}
+
+void PPCRecompiler_cleanupAArch64Code(void* code, size_t size)
+{
+ AArch64Allocator allocator;
+ if (allocator.useProtect())
+ CodeArray::protect(code, size, CodeArray::PROTECT_RW);
+ allocator.free(static_cast(code));
+}
+
+void AArch64GenContext_t::enterRecompilerCode()
+{
+ constexpr size_t STACK_SIZE = 160 /* x19 .. x30 + v8.d[0] .. v15.d[0] */;
+ static_assert(STACK_SIZE % 16 == 0);
+ sub(sp, sp, STACK_SIZE);
+ mov(x9, sp);
+
+ stp(x19, x20, AdrPostImm(x9, 16));
+ stp(x21, x22, AdrPostImm(x9, 16));
+ stp(x23, x24, AdrPostImm(x9, 16));
+ stp(x25, x26, AdrPostImm(x9, 16));
+ stp(x27, x28, AdrPostImm(x9, 16));
+ stp(x29, x30, AdrPostImm(x9, 16));
+ st4((v8.d - v11.d)[0], AdrPostImm(x9, 32));
+ st4((v12.d - v15.d)[0], AdrPostImm(x9, 32));
+ mov(HCPU_REG, x1); // call argument 2
+ mov(PPC_REC_INSTANCE_REG, (uint64)ppcRecompilerInstanceData);
+ mov(MEM_BASE_REG, (uint64)memory_base);
+
+ // branch to recFunc
+ blr(x0); // call argument 1
+
+ mov(x9, sp);
+ ldp(x19, x20, AdrPostImm(x9, 16));
+ ldp(x21, x22, AdrPostImm(x9, 16));
+ ldp(x23, x24, AdrPostImm(x9, 16));
+ ldp(x25, x26, AdrPostImm(x9, 16));
+ ldp(x27, x28, AdrPostImm(x9, 16));
+ ldp(x29, x30, AdrPostImm(x9, 16));
+ ld4((v8.d - v11.d)[0], AdrPostImm(x9, 32));
+ ld4((v12.d - v15.d)[0], AdrPostImm(x9, 32));
+
+ add(sp, sp, STACK_SIZE);
+
+ ret();
+}
+
+void AArch64GenContext_t::leaveRecompilerCode()
+{
+ str(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
+ ret();
+}
+
+bool initializedInterfaceFunctions = false;
+AArch64GenContext_t enterRecompilerCode_ctx{};
+
+AArch64GenContext_t leaveRecompilerCode_unvisited_ctx{};
+AArch64GenContext_t leaveRecompilerCode_visited_ctx{};
+void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions()
+{
+ if (initializedInterfaceFunctions)
+ return;
+ initializedInterfaceFunctions = true;
+
+ enterRecompilerCode_ctx.enterRecompilerCode();
+ enterRecompilerCode_ctx.readyRE();
+ PPCRecompiler_enterRecompilerCode = enterRecompilerCode_ctx.getCode();
+
+ leaveRecompilerCode_unvisited_ctx.leaveRecompilerCode();
+ leaveRecompilerCode_unvisited_ctx.readyRE();
+ PPCRecompiler_leaveRecompilerCode_unvisited = leaveRecompilerCode_unvisited_ctx.getCode();
+
+ leaveRecompilerCode_visited_ctx.leaveRecompilerCode();
+ leaveRecompilerCode_visited_ctx.readyRE();
+ PPCRecompiler_leaveRecompilerCode_visited = leaveRecompilerCode_visited_ctx.getCode();
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
new file mode 100644
index 00000000..b610ee04
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "HW/Espresso/Recompiler/IML/IMLInstruction.h"
+#include "../PPCRecompiler.h"
+
+bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext);
+void PPCRecompiler_cleanupAArch64Code(void* code, size_t size);
+
+void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
+
+// architecture specific constants
+namespace IMLArchAArch64
+{
+ static constexpr int PHYSREG_GPR_BASE = 0;
+ static constexpr int PHYSREG_GPR_COUNT = 25;
+ static constexpr int PHYSREG_FPR_BASE = PHYSREG_GPR_COUNT;
+ static constexpr int PHYSREG_FPR_COUNT = 31;
+}; // namespace IMLArchAArch64
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
new file mode 100644
index 00000000..eadb80fb
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
@@ -0,0 +1,1672 @@
+#include "Cafe/HW/Espresso/PPCState.h"
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h"
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+#include "BackendX64.h"
+#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
+#include "util/MemMapper/MemMapper.h"
+#include "Common/cpu_features.h"
+#include
+
+static x86Assembler64::GPR32 _reg32(IMLReg physReg)
+{
+ cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32);
+ IMLRegID regId = physReg.GetRegID();
+ cemu_assert_debug(regId < 16);
+ return (x86Assembler64::GPR32)regId;
+}
+
+static uint32 _reg64(IMLReg physReg)
+{
+ cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64);
+ IMLRegID regId = physReg.GetRegID();
+ cemu_assert_debug(regId < 16);
+ return regId;
+}
+
+uint32 _regF64(IMLReg physReg)
+{
+ cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::F64);
+ IMLRegID regId = physReg.GetRegID();
+ cemu_assert_debug(regId >= IMLArchX86::PHYSREG_FPR_BASE && regId < IMLArchX86::PHYSREG_FPR_BASE+16);
+ regId -= IMLArchX86::PHYSREG_FPR_BASE;
+ return regId;
+}
+
+static x86Assembler64::GPR8_REX _reg8(IMLReg physReg)
+{
+ cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); // for now these are represented as 32bit
+ return (x86Assembler64::GPR8_REX)physReg.GetRegID();
+}
+
+static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
+{
+ return (x86Assembler64::GPR32)regId;
+}
+
+static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
+{
+ return (x86Assembler64::GPR8_REX)regId;
+}
+
+static x86Assembler64::GPR8_REX _reg8_from_reg64(uint32 regId)
+{
+ return (x86Assembler64::GPR8_REX)regId;
+}
+
+static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId)
+{
+ return (x86Assembler64::GPR64)regId;
+}
+
+X86Cond _x86Cond(IMLCondition imlCond)
+{
+ switch (imlCond)
+ {
+ case IMLCondition::EQ:
+ return X86_CONDITION_Z;
+ case IMLCondition::NEQ:
+ return X86_CONDITION_NZ;
+ case IMLCondition::UNSIGNED_GT:
+ return X86_CONDITION_NBE;
+ case IMLCondition::UNSIGNED_LT:
+ return X86_CONDITION_B;
+ case IMLCondition::SIGNED_GT:
+ return X86_CONDITION_NLE;
+ case IMLCondition::SIGNED_LT:
+ return X86_CONDITION_L;
+ default:
+ break;
+ }
+ cemu_assert_suspicious();
+ return X86_CONDITION_Z;
+}
+
+X86Cond _x86CondInverted(IMLCondition imlCond)
+{
+ switch (imlCond)
+ {
+ case IMLCondition::EQ:
+ return X86_CONDITION_NZ;
+ case IMLCondition::NEQ:
+ return X86_CONDITION_Z;
+ case IMLCondition::UNSIGNED_GT:
+ return X86_CONDITION_BE;
+ case IMLCondition::UNSIGNED_LT:
+ return X86_CONDITION_NB;
+ case IMLCondition::SIGNED_GT:
+ return X86_CONDITION_LE;
+ case IMLCondition::SIGNED_LT:
+ return X86_CONDITION_NL;
+ default:
+ break;
+ }
+ cemu_assert_suspicious();
+ return X86_CONDITION_Z;
+}
+
+X86Cond _x86Cond(IMLCondition imlCond, bool condIsInverted)
+{
+ if (condIsInverted)
+ return _x86CondInverted(imlCond);
+ return _x86Cond(imlCond);
+}
+
+/*
+* Remember current instruction output offset for reloc
+* The instruction generated after this method has been called will be adjusted
+*/
+void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr)
+{
+ x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo);
+}
+
+void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset)
+{
+ uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset;
+ if (instructionData[0] == 0x0F && (instructionData[1] >= 0x80 && instructionData[1] <= 0x8F))
+ {
+ // far conditional jump
+ *(uint32*)(instructionData + 2) = (destinationOffset - (jumpInstructionOffset + 6));
+ }
+ else if (instructionData[0] >= 0x70 && instructionData[0] <= 0x7F)
+ {
+ // short conditional jump
+ sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2)));
+ cemu_assert_debug(distance >= -128 && distance <= 127);
+ *(uint8*)(instructionData + 1) = (uint8)distance;
+ }
+ else if (instructionData[0] == 0xE9)
+ {
+ *(uint32*)(instructionData + 1) = (destinationOffset - (jumpInstructionOffset + 5));
+ }
+ else if (instructionData[0] == 0xEB)
+ {
+ sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2)));
+ cemu_assert_debug(distance >= -128 && distance <= 127);
+ *(uint8*)(instructionData + 1) = (uint8)distance;
+ }
+ else
+ {
+ assert_dbg();
+ }
+}
+
+void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId)
+{
+ void* prevRSPTemp = hCPU->rspTemp;
+ if( hleFuncId == 0xFFD0 )
+ {
+ hCPU->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call
+ hCPU->gpr[3] = 0;
+ PPCInterpreter_nextInstruction(hCPU);
+ return hCPU;
+ }
+ else
+ {
+ auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
+ cemu_assert(hleCall != nullptr);
+ hleCall(hCPU);
+ }
+ hCPU->rspTemp = prevRSPTemp;
+ return PPCInterpreter_getCurrentInstance();
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ //x64Gen_int3(x64GenContext);
+ uint32 branchDstReg = _reg32(imlInstruction->op_macro.paramReg);
+ if(X86_REG_RDX != branchDstReg)
+ x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg);
+ // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX
+ // JMP [offset+RDX*(8/4)+R15]
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA4);
+ x64Gen_writeU8(x64GenContext, 0x57);
+ x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_BL )
+ {
+ // MOV DWORD [SPR_LinkRegister], newLR
+ uint32 newLR = imlInstruction->op_macro.param + 4;
+ x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), newLR);
+ // remember new instruction pointer in RDX
+ uint32 newIP = imlInstruction->op_macro.param2;
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP);
+ // since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary
+ uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ if (lookupOffset >= 0x80000000ULL)
+ {
+ // JMP [offset+RDX*(8/4)+R15]
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA4);
+ x64Gen_writeU8(x64GenContext, 0x57);
+ x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ }
+ else
+ {
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA7);
+ x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
+ }
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_B_FAR )
+ {
+ // remember new instruction pointer in RDX
+ uint32 newIP = imlInstruction->op_macro.param2;
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP);
+ // Since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary
+ uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ if (lookupOffset >= 0x80000000ULL)
+ {
+ // JMP [offset+RDX*(8/4)+R15]
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA4);
+ x64Gen_writeU8(x64GenContext, 0x57);
+ x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ }
+ else
+ {
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA7);
+ x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
+ }
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_LEAVE )
+ {
+ uint32 currentInstructionAddress = imlInstruction->op_macro.param;
+ // remember PC value in REG_EDX
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, currentInstructionAddress);
+
+ uint32 newIP = 0; // special value for recompiler exit
+ uint64 lookupOffset = (uint64)&(((PPCRecompilerInstanceData_t*)NULL)->ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
+ // JMP [R15+offset]
+ x64Gen_writeU8(x64GenContext, 0x41);
+ x64Gen_writeU8(x64GenContext, 0xFF);
+ x64Gen_writeU8(x64GenContext, 0xA7);
+ x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK )
+ {
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_macro.param2);
+ x64Gen_int3(x64GenContext);
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES )
+ {
+ uint32 cycleCount = imlInstruction->op_macro.param;
+ x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), cycleCount);
+ return true;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_MACRO_HLE )
+ {
+ uint32 ppcAddress = imlInstruction->op_macro.param;
+ uint32 funcId = imlInstruction->op_macro.param2;
+ // update instruction pointer
+ x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress);
+ // set parameters
+ x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, REG_RESV_HCPU);
+ x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RDX, funcId);
+ // restore stackpointer from hCPU->rspTemp
+ x64Emit_mov_reg64_mem64(x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp));
+ // reserve space on stack for call parameters
+ x64Gen_sub_reg64_imm32(x64GenContext, X86_REG_RSP, 8*11); // must be uneven number in order to retain stack 0x10 alignment
+ x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RBP, 0);
+ // call HLE function
+ x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_virtualHLE);
+ x64Gen_call_reg64(x64GenContext, X86_REG_RAX);
+ // restore RSP to hCPU (from RAX, result of PPCRecompiler_virtualHLE)
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_HCPU, X86_REG_RAX);
+ // MOV R15, ppcRecompilerInstanceData
+ x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData);
+ // MOV R13, memory_base
+ x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base);
+ // check if cycles where decreased beyond zero, if yes -> leave recompiler
+ x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
+ sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0);
+
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RDX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer));
+ // set EAX to 0 (we assume that ppcRecompilerDirectJumpTable[0] will be a recompiler escape function)
+ x64Gen_xor_reg32_reg32(x64GenContext, X86_REG_RAX, X86_REG_RAX);
+ // ADD RAX, REG_RESV_RECDATA
+ x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA);
+ // JMP [recompilerCallTable+EAX/4*8]
+ x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
+ // check if instruction pointer was changed
+ // assign new instruction pointer to EAX
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer));
+ // remember instruction pointer in REG_EDX
+ x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, X86_REG_RAX);
+ // EAX *= 2
+ x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_RAX);
+ // ADD RAX, REG_RESV_RECDATA
+ x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA);
+ // JMP [ppcRecompilerDirectJumpTable+RAX/4*8]
+ x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
+ return true;
+ }
+ else
+ {
+ debug_printf("Unknown recompiler macro operation %d\n", imlInstruction->operation);
+ assert_dbg();
+ }
+ return false;
+}
+
+/*
+* Load from memory
+*/
+bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
+{
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
+ if (indexed)
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
+
+ IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
+ IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
+ IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
+ if( indexed )
+ realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
+ if( indexed && realRegisterMem == realRegisterMem2 )
+ {
+ return false;
+ }
+ if( indexed && realRegisterData == realRegisterMem2 )
+ {
+ // for indexed memory access realRegisterData must not be the same register as the second memory register,
+ // this can easily be worked around by swapping realRegisterMem and realRegisterMem2
+ std::swap(realRegisterMem, realRegisterMem2);
+ }
+
+ bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
+ bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
+ if( imlInstruction->op_storeLoad.copyWidth == 32 )
+ {
+ if (indexed)
+ {
+ x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2);
+ }
+ if( g_CPUFeatures.x86.movbe && switchEndian )
+ {
+ if (indexed)
+ {
+ x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
+ }
+ else
+ {
+ x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ }
+ }
+ else
+ {
+ if (indexed)
+ {
+ x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
+ if (switchEndian)
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
+ }
+ else
+ {
+ x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ if (switchEndian)
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
+ }
+ }
+ }
+ else if( imlInstruction->op_storeLoad.copyWidth == 16 )
+ {
+ if (indexed)
+ {
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ if(g_CPUFeatures.x86.movbe && switchEndian )
+ {
+ x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ if( indexed && realRegisterMem != realRegisterData )
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else
+ {
+ x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ if( indexed && realRegisterMem != realRegisterData )
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ if( switchEndian )
+ x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8);
+ }
+ if( signExtend )
+ x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
+ else
+ x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
+ }
+ else if( imlInstruction->op_storeLoad.copyWidth == 8 )
+ {
+ if( indexed )
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ if( signExtend )
+ x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ else
+ x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ if( indexed && realRegisterMem != realRegisterData )
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else
+ return false;
+ return true;
+}
+
+/*
+* Write to memory
+*/
+bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
+{
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
+ if (indexed)
+ cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
+
+ IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
+ IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
+ IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
+ if (indexed)
+ realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
+
+ if (indexed && realRegisterMem == realRegisterMem2)
+ {
+ return false;
+ }
+ if (indexed && realRegisterData == realRegisterMem2)
+ {
+ // for indexed memory access realRegisterData must not be the same register as the second memory register,
+ // this can easily be worked around by swapping realRegisterMem and realRegisterMem2
+ std::swap(realRegisterMem, realRegisterMem2);
+ }
+
+ bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
+ bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
+ if (imlInstruction->op_storeLoad.copyWidth == 32)
+ {
+ uint32 valueRegister;
+ if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData)
+ {
+ valueRegister = realRegisterData;
+ }
+ else
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
+ valueRegister = REG_RESV_TEMP;
+ }
+ if (!g_CPUFeatures.x86.movbe && swapEndian)
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister);
+ if (indexed)
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ if (g_CPUFeatures.x86.movbe && swapEndian)
+ x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
+ else
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
+ if (indexed)
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 16)
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
+ if (swapEndian)
+ x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8);
+ if (indexed)
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ if (indexed)
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ // todo: Optimize this, e.g. by using MOVBE
+ }
+ else if (imlInstruction->op_storeLoad.copyWidth == 8)
+ {
+ if (indexed && realRegisterMem == realRegisterData)
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
+ realRegisterData = REG_RESV_TEMP;
+ }
+ if (indexed)
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData);
+ if (indexed)
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else
+ return false;
+ return true;
+}
+
+void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut));
+ auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA);
+ auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue);
+ auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue);
+
+ cemu_assert_debug(regBoolOut == X86_REG_EAX);
+ cemu_assert_debug(regEA != X86_REG_EAX);
+ cemu_assert_debug(regVal != X86_REG_EAX);
+ cemu_assert_debug(regCmp != X86_REG_EAX);
+
+ x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp);
+ x64GenContext->emitter->LockPrefix();
+ x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal);
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut);
+ x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here
+}
+
+void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ // the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here
+ x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x20); // reserve enough space for any parameters while keeping stack alignment of 16 intact
+ x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress);
+ x64GenContext->emitter->CALL_q(X86_REG_RAX);
+ x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x20);
+ // a note about the stack pointer:
+ // currently the code generated by generateEnterRecompilerCode makes sure the stack is 16 byte aligned, so we don't need to fix it up here
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_r.regR);
+ auto regA = _reg32(imlInstruction->op_r_r.regA);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
+ {
+ // registerResult = registerA
+ if (regR != regA)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
+ {
+ if (regA != regR)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); // if movbe is available we can move and swap in a single instruction?
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, regR);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 )
+ {
+ x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, regR, regA);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
+ {
+ x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, regR, reg32ToReg16(regA));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_NOT )
+ {
+ // copy register content if different registers
+ if( regR != regA )
+ x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
+ x64Gen_not_reg64Low32(x64GenContext, regR);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
+ {
+ // copy register content if different registers
+ if (regR != regA)
+ x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
+ x64Gen_neg_reg64Low32(x64GenContext, regR);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW )
+ {
+ // count leading zeros
+ // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
+ if(g_CPUFeatures.x86.lzcnt)
+ {
+ x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ }
+ else
+ {
+ x64Gen_test_reg64Low32_reg64Low32(x64GenContext, regA, regA);
+ sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0);
+ x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ x64Gen_neg_reg64Low32(x64GenContext, regR);
+ x64Gen_add_reg64Low32_imm32(x64GenContext, regR, 32-1);
+ sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, 32);
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP)
+ {
+ x64GenContext->emitter->CMP_dd(regR, regA);
+ }
+ else
+ {
+ cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_immS32.regR);
+
+ if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
+ {
+ x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, (uint32)imlInstruction->op_r_immS32.immS32);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
+ {
+ cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0);
+ x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP)
+ {
+ sint32 imm = imlInstruction->op_r_immS32.immS32;
+ x64GenContext->emitter->CMP_di32(regR, imm);
+ }
+ else
+ {
+ cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR);
+ auto rRegOperand1 = _reg32(imlInstruction->op_r_r_r.regA);
+ auto rRegOperand2 = _reg32(imlInstruction->op_r_r_r.regB);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_ADD)
+ {
+ // registerResult = registerOperand1 + registerOperand2
+ if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
+ {
+ // be careful not to overwrite the operand before we use it
+ if( rRegResult == rRegOperand1 )
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ else
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
+ }
+ else
+ {
+ // copy operand1 to destination register before doing addition
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
+ {
+ if( rRegOperand1 == rRegOperand2 )
+ {
+ // result = operand1 - operand1 -> 0
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult);
+ }
+ else if( rRegResult == rRegOperand1 )
+ {
+ // result = result - operand2
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ else if ( rRegResult == rRegOperand2 )
+ {
+ // result = operand1 - result
+ x64Gen_neg_reg64Low32(x64GenContext, rRegResult);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
+ }
+ else
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR)
+ {
+ if (rRegResult == rRegOperand2)
+ std::swap(rRegOperand1, rRegOperand2);
+
+ if (rRegResult != rRegOperand1)
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_OR)
+ x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ else if (imlInstruction->operation == PPCREC_IML_OP_AND)
+ x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ else
+ x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
+ {
+ // registerResult = registerOperand1 * registerOperand2
+ if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
+ {
+ // be careful not to overwrite the operand before we use it
+ if( rRegResult == rRegOperand1 )
+ x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ else
+ x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
+ }
+ else
+ {
+ // copy operand1 to destination register before doing multiplication
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+ // add operand2
+ x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW )
+ {
+ // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits)
+
+ if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW)
+ {
+ // use BMI2 SHRX if available
+ x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ }
+ else if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SLW)
+ {
+ // use BMI2 SHLX if available
+ x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); // trim result to 32bit
+ }
+ else
+ {
+ // lazy and slow way to do shift by register without relying on ECX/CL or BMI2
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
+ for (sint32 b = 0; b < 6; b++)
+ {
+ x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1 << b));
+ sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
+ if (b == 5)
+ {
+ x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
+ }
+ else
+ {
+ if (imlInstruction->operation == PPCREC_IML_OP_SLW)
+ x64Gen_shl_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b));
+ else
+ x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b));
+ }
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
+ }
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
+ {
+ // todo: Use BMI2 rotate if available
+ // check if CL/ECX/RCX is available
+ if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX )
+ {
+ // swap operand 2 with RCX
+ x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
+ // move operand 1 to temp register
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
+ // rotate
+ x64Gen_rol_reg64Low32_cl(x64GenContext, REG_RESV_TEMP);
+ // undo swap operand 2 with RCX
+ x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
+ // copy to result register
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
+ }
+ else
+ {
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
+ // lazy and slow way to do shift by register without relying on ECX/CL
+ for(sint32 b=0; b<5; b++)
+ {
+ x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
+ x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<emitter->GetWriteIndex());
+ }
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
+ }
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S ||
+ imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
+ imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ {
+ if(g_CPUFeatures.x86.bmi2)
+ {
+ if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
+ }
+ else
+ {
+ cemu_assert_debug(rRegOperand2 == X86_REG_ECX);
+ bool useTempReg = rRegResult == X86_REG_ECX && rRegOperand1 != X86_REG_ECX;
+ auto origRegResult = rRegResult;
+ if(useTempReg)
+ {
+ x64GenContext->emitter->MOV_dd(REG_RESV_TEMP, rRegOperand1);
+ rRegResult = REG_RESV_TEMP;
+ }
+ if(rRegOperand1 != rRegResult)
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
+ if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ x64GenContext->emitter->SAR_d_CL(rRegResult);
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ x64GenContext->emitter->SHR_d_CL(rRegResult);
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ x64GenContext->emitter->SHL_d_CL(rRegResult);
+ if(useTempReg)
+ x64GenContext->emitter->MOV_dd(origRegResult, REG_RESV_TEMP);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
+ {
+ x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
+ x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
+ // mov operand 2 to temp register
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2);
+ // mov operand1 to EAX
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1);
+ // sign or zero extend EAX to EDX:EAX based on division sign mode
+ if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED )
+ x64Gen_cdq(x64GenContext);
+ else
+ x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX);
+ // make sure we avoid division by zero
+ x64Gen_test_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 3);
+ // divide
+ if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED )
+ x64Gen_idiv_reg64Low32(x64GenContext, REG_RESV_TEMP);
+ else
+ x64Gen_div_reg64Low32(x64GenContext, REG_RESV_TEMP);
+ // result of division is now stored in EAX, move it to result register
+ if( rRegResult != X86_REG_EAX )
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EAX);
+ // restore EAX / EDX
+ if( rRegResult != X86_REG_RAX )
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
+ if( rRegResult != X86_REG_RDX )
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED )
+ {
+ x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
+ x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
+ // mov operand 2 to temp register
+ x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2);
+ // mov operand1 to EAX
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1);
+ if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED )
+ {
+ // zero extend EAX to EDX:EAX
+ x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX);
+ }
+ else
+ {
+ // sign extend EAX to EDX:EAX
+ x64Gen_cdq(x64GenContext);
+ }
+ // multiply
+ if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED )
+ x64Gen_imul_reg64Low32(x64GenContext, REG_RESV_TEMP);
+ else
+ x64Gen_mul_reg64Low32(x64GenContext, REG_RESV_TEMP);
+ // result of multiplication is now stored in EDX:EAX, move it to result register
+ if( rRegResult != X86_REG_EDX )
+ x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EDX);
+ // restore EAX / EDX
+ if( rRegResult != X86_REG_RAX )
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
+ if( rRegResult != X86_REG_RDX )
+ x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]));
+ }
+ else
+ {
+ cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR);
+ auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA);
+ auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB);
+ auto regCarry = _reg32(imlInstruction->op_r_r_r_carry.regCarry);
+ bool carryRegIsShared = regCarry == regA || regCarry == regB;
+ cemu_assert_debug(regCarry != regR); // two outputs sharing the same register is undefined behavior
+
+ switch (imlInstruction->operation)
+ {
+ case PPCREC_IML_OP_ADD:
+ if (regB == regR)
+ std::swap(regB, regA);
+ if (regR != regA)
+ x64GenContext->emitter->MOV_dd(regR, regA);
+ if(!carryRegIsShared)
+ x64GenContext->emitter->XOR_dd(regCarry, regCarry);
+ x64GenContext->emitter->ADD_dd(regR, regB);
+ x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); // below condition checks carry flag
+ if(carryRegIsShared)
+ x64GenContext->emitter->AND_di8(regCarry, 1); // clear upper bits
+ break;
+ case PPCREC_IML_OP_ADD_WITH_CARRY:
+ // assumes that carry is already correctly initialized as 0 or 1
+ if (regB == regR)
+ std::swap(regB, regA);
+ if (regR != regA)
+ x64GenContext->emitter->MOV_dd(regR, regA);
+ x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
+ x64GenContext->emitter->ADC_dd(regR, regB);
+ x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
+ break;
+ default:
+ cemu_assert_unimplemented();
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_IsSameCompare(IMLInstruction* imlInstructionA, IMLInstruction* imlInstructionB)
+{
+ if(imlInstructionA->type != imlInstructionB->type)
+ return false;
+ if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE)
+ return imlInstructionA->op_compare.regA == imlInstructionB->op_compare.regA && imlInstructionA->op_compare.regB == imlInstructionB->op_compare.regB;
+ else if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE_S32)
+ return imlInstructionA->op_compare_s32.regA == imlInstructionB->op_compare_s32.regA && imlInstructionA->op_compare_s32.immS32 == imlInstructionB->op_compare_s32.immS32;
+ return false;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, sint32& extraInstructionsProcessed)
+{
+ extraInstructionsProcessed = 0;
+ boost::container::static_vector compareInstructions;
+ compareInstructions.push_back(imlInstruction);
+ for(sint32 i=1; i<4; i++)
+ {
+ IMLInstruction* nextIns = x64GenContext->GetNextInstruction(i);
+ if(!nextIns || !PPCRecompilerX64Gen_IsSameCompare(imlInstruction, nextIns))
+ break;
+ compareInstructions.push_back(nextIns);
+ }
+ auto OperandOverlapsWithR = [&](IMLInstruction* ins) -> bool
+ {
+ cemu_assert_debug(ins->type == PPCREC_IML_TYPE_COMPARE || ins->type == PPCREC_IML_TYPE_COMPARE_S32);
+ if(ins->type == PPCREC_IML_TYPE_COMPARE)
+ return _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regA) || _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regB);
+ else /* PPCREC_IML_TYPE_COMPARE_S32 */
+ return _reg32_from_reg8(_reg8(ins->op_compare_s32.regR)) == _reg32(ins->op_compare_s32.regA);
+ };
+ auto GetRegR = [](IMLInstruction* insn)
+ {
+ return insn->type == PPCREC_IML_TYPE_COMPARE ? _reg32_from_reg8(_reg8(insn->op_compare.regR)) : _reg32_from_reg8(_reg8(insn->op_compare_s32.regR));
+ };
+ // prefer XOR method for zeroing out registers if possible
+ for(auto& it : compareInstructions)
+ {
+ if(OperandOverlapsWithR(it))
+ continue;
+ auto regR = GetRegR(it);
+ x64GenContext->emitter->XOR_dd(regR, regR); // zero bytes unaffected by SETcc
+ }
+ // emit the compare instruction
+ if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
+ {
+ auto regA = _reg32(imlInstruction->op_compare.regA);
+ auto regB = _reg32(imlInstruction->op_compare.regB);
+ x64GenContext->emitter->CMP_dd(regA, regB);
+ }
+ else if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ auto regA = _reg32(imlInstruction->op_compare_s32.regA);
+ sint32 imm = imlInstruction->op_compare_s32.immS32;
+ x64GenContext->emitter->CMP_di32(regA, imm);
+ }
+ // emit the SETcc instructions
+ for(auto& it : compareInstructions)
+ {
+ auto regR = _reg8(it->op_compare.regR);
+ X86Cond cond = _x86Cond(it->op_compare.cond);
+ if(OperandOverlapsWithR(it))
+ x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0);
+ x64GenContext->emitter->SETcc_b(cond, regR);
+ }
+ extraInstructionsProcessed = (sint32)compareInstructions.size() - 1;
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
+{
+ auto regBool = _reg8(imlInstruction->op_conditional_jump.registerBool);
+ bool mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue;
+ x64GenContext->emitter->TEST_bb(regBool, regBool);
+ PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
+ x64GenContext->emitter->Jcc_j32(mustBeTrue ? X86_CONDITION_NZ : X86_CONDITION_Z, 0);
+ return true;
+}
+
+void PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
+{
+ X86Cond cond = _x86Cond(imlInstruction->op_x86_eflags_jcc.cond, imlInstruction->op_x86_eflags_jcc.invertedCondition);
+ PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
+ x64GenContext->emitter->Jcc_j32(cond, 0);
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
+{
+ PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
+ x64GenContext->emitter->JMP_j32(0);
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_r_s32.regR);
+ auto regA = _reg32(imlInstruction->op_r_r_s32.regA);
+ uint32 immS32 = imlInstruction->op_r_r_s32.immS32;
+
+ if( imlInstruction->operation == PPCREC_IML_OP_ADD )
+ {
+ uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
+ if(regR != regA)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ x64Gen_add_reg64Low32_imm32(x64GenContext, regR, (uint32)immU32);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
+ {
+ if (regR != regA)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ x64Gen_sub_reg64Low32_imm32(x64GenContext, regR, immS32);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_AND ||
+ imlInstruction->operation == PPCREC_IML_OP_OR ||
+ imlInstruction->operation == PPCREC_IML_OP_XOR)
+ {
+ if (regR != regA)
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ if (imlInstruction->operation == PPCREC_IML_OP_AND)
+ x64Gen_and_reg64Low32_imm32(x64GenContext, regR, immS32);
+ else if (imlInstruction->operation == PPCREC_IML_OP_OR)
+ x64Gen_or_reg64Low32_imm32(x64GenContext, regR, immS32);
+ else // XOR
+ x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
+ {
+ // registerResult = registerOperand * immS32
+ sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32;
+ x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize
+ if( regR != regA )
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ||
+ imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
+ imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ {
+ if( regA != regR )
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
+ if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
+ x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
+ else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ x64Gen_shr_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
+ else // RIGHT_SHIFT_S
+ x64Gen_sar_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
+ }
+ else
+ {
+ debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR);
+ auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA);
+ sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32;
+ auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry);
+ cemu_assert_debug(regCarry != regR); // we dont allow two different outputs sharing the same register
+
+ bool delayCarryInit = regCarry == regA;
+
+ switch (imlInstruction->operation)
+ {
+ case PPCREC_IML_OP_ADD:
+ if(!delayCarryInit)
+ x64GenContext->emitter->XOR_dd(regCarry, regCarry);
+ if (regR != regA)
+ x64GenContext->emitter->MOV_dd(regR, regA);
+ x64GenContext->emitter->ADD_di32(regR, immS32);
+ if(delayCarryInit)
+ x64GenContext->emitter->MOV_di32(regCarry, 0);
+ x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
+ break;
+ case PPCREC_IML_OP_ADD_WITH_CARRY:
+ // assumes that carry is already correctly initialized as 0 or 1
+ cemu_assert_debug(regCarry != regR);
+ if (regR != regA)
+ x64GenContext->emitter->MOV_dd(regR, regA);
+ x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
+ x64GenContext->emitter->ADC_di32(regR, immS32);
+ x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
+ break;
+ default:
+ cemu_assert_unimplemented();
+ return false;
+ }
+ return true;
+}
+
+bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ // some tests (all performed on a i7-4790K)
+ // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency)
+ // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC
+
+ // BT
+ x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
+ cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken());
+ PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken());
+ x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0);
+ return true;
+}
+
+void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 name = imlInstruction->op_r_name.name;
+ if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ auto regR = _reg64(imlInstruction->op_r_name.regR);
+ if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
+ {
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0));
+ }
+ else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
+ {
+ sint32 sprIndex = (name - PPCREC_NAME_SPR0);
+ if (sprIndex == SPR_LR)
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR));
+ else if (sprIndex == SPR_CTR)
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR));
+ else if (sprIndex == SPR_XER)
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER));
+ else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
+ {
+ sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, memOffset);
+ }
+ else
+ assert_dbg();
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
+ {
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
+ }
+ else if (name == PPCREC_NAME_XER_CA)
+ {
+ x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca));
+ }
+ else if (name == PPCREC_NAME_XER_SO)
+ {
+ x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so));
+ }
+ else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
+ {
+ x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_EA)
+ {
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
+ {
+ x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue));
+ }
+ else
+ assert_dbg();
+ }
+ else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ auto regR = _regF64(imlInstruction->op_r_name.regR);
+ if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
+ {
+ sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
+ sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
+ x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double));
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
+ {
+ x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
+ }
+ else
+ {
+ cemu_assert_debug(false);
+ }
+ }
+ else
+ DEBUG_BREAK;
+
+}
+
+void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 name = imlInstruction->op_r_name.name;
+
+ if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ auto regR = _reg64(imlInstruction->op_r_name.regR);
+ if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
+ {
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR);
+ }
+ else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
+ {
+ uint32 sprIndex = (name - PPCREC_NAME_SPR0);
+ if (sprIndex == SPR_LR)
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), regR);
+ else if (sprIndex == SPR_CTR)
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR), regR);
+ else if (sprIndex == SPR_XER)
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER), regR);
+ else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
+ {
+ sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, memOffset, regR);
+ }
+ else
+ assert_dbg();
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
+ {
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR);
+ }
+ else if (name == PPCREC_NAME_XER_CA)
+ {
+ x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR));
+ }
+ else if (name == PPCREC_NAME_XER_SO)
+ {
+ x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR));
+ }
+ else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
+ {
+ x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR));
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_EA)
+ {
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr), regR);
+ }
+ else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
+ {
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue), regR);
+ }
+ else
+ assert_dbg();
+ }
+ else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ auto regR = _regF64(imlInstruction->op_r_name.regR);
+ uint32 name = imlInstruction->op_r_name.name;
+ if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
+ {
+ sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
+ sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
+ x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0));
+ }
+ else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
+ {
+ x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
+ }
+ else
+ {
+ cemu_assert_debug(false);
+ }
+ }
+ else
+ DEBUG_BREAK;
+
+
+}
+
+uint8* codeMemoryBlock = nullptr;
+sint32 codeMemoryBlockIndex = 0;
+sint32 codeMemoryBlockSize = 0;
+
+std::mutex mtx_allocExecutableMemory;
+
+uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size)
+{
+ std::lock_guard lck(mtx_allocExecutableMemory);
+ if( codeMemoryBlockIndex+size > codeMemoryBlockSize )
+ {
+ // allocate new block
+ codeMemoryBlockSize = std::max(1024*1024*4, size+1024); // 4MB (or more if the function is larger than 4MB)
+ codeMemoryBlockIndex = 0;
+ codeMemoryBlock = (uint8*)MemMapper::AllocateMemory(nullptr, codeMemoryBlockSize, MemMapper::PAGE_PERMISSION::P_RWX);
+ }
+ uint8* codeMem = codeMemoryBlock + codeMemoryBlockIndex;
+ codeMemoryBlockIndex += size;
+ // pad to 4 byte alignment
+ while (codeMemoryBlockIndex & 3)
+ {
+ codeMemoryBlock[codeMemoryBlockIndex] = 0x90;
+ codeMemoryBlockIndex++;
+ }
+ return codeMem;
+}
+
+bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext)
+{
+ x64GenContext_t x64GenContext{};
+
+ // generate iml instruction code
+ bool codeGenerationFailed = false;
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ x64GenContext.currentSegment = segIt;
+ segIt->x64Offset = x64GenContext.emitter->GetWriteIndex();
+ for(size_t i=0; iimlList.size(); i++)
+ {
+ x64GenContext.m_currentInstructionEmitIndex = i;
+ IMLInstruction* imlInstruction = segIt->imlList.data() + i;
+
+ if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME )
+ {
+ PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_R_R )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
+ codeGenerationFailed = true;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ sint32 extraInstructionsProcessed;
+ PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, extraInstructionsProcessed);
+ i += extraInstructionsProcessed;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
+ codeGenerationFailed = true;
+ }
+ else if(imlInstruction->type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ {
+ PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
+ {
+ if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
+ codeGenerationFailed = true;
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
+ {
+ PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_STORE )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP )
+ {
+ // no op
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED )
+ {
+ if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
+ {
+ codeGenerationFailed = true;
+ }
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R )
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE)
+ {
+ PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
+ }
+ else
+ {
+ debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type);
+ assert_dbg();
+ }
+ }
+ }
+ // handle failed code generation
+ if( codeGenerationFailed )
+ {
+ return false;
+ }
+ // allocate executable memory
+ uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
+ size_t baseAddress = (size_t)executableMemory;
+ // fix relocs
+ for(auto& relocIt : x64GenContext.relocateOffsetTable2)
+ {
+ // search for segment that starts with this offset
+ uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo;
+ uint32 x64Offset = 0xFFFFFFFF;
+
+ IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo;
+ x64Offset = destSegment->x64Offset;
+
+ uint32 relocBase = relocIt.offset;
+ uint8* relocInstruction = x64GenContext.emitter->GetBufferPtr()+relocBase;
+ if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) )
+ {
+ // Jcc relativeImm32
+ sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2));
+ if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled
+ {
+ // convert to near Jcc
+ *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70);
+ // patch offset
+ *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump;
+ // replace unused 4 bytes with NOP instruction
+ relocInstruction[2] = 0x0F;
+ relocInstruction[3] = 0x1F;
+ relocInstruction[4] = 0x40;
+ relocInstruction[5] = 0x00;
+ }
+ else
+ {
+ // patch offset
+ *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6));
+ }
+ }
+ else if( relocInstruction[0] == 0xE9 )
+ {
+ // JMP relativeImm32
+ *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5));
+ }
+ else
+ assert_dbg();
+ }
+
+ // copy code to executable memory
+ std::span codeBuffer = x64GenContext.emitter->GetBuffer();
+ memcpy(executableMemory, codeBuffer.data(), codeBuffer.size_bytes());
+ // set code
+ PPCRecFunction->x86Code = executableMemory;
+ PPCRecFunction->x86Size = codeBuffer.size_bytes();
+ return true;
+}
+
+void PPCRecompilerX64Gen_generateEnterRecompilerCode()
+{
+ x64GenContext_t x64GenContext{};
+
+ // start of recompiler entry function (15 regs)
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RAX);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RCX);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RDX);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RBX);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RBP);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RDI);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_RSI);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R8);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R9);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R10);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R11);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R12);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R13);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R14);
+ x64Gen_push_reg64(&x64GenContext, X86_REG_R15);
+
+ // 000000007775EF04 | E8 00 00 00 00 call +0x00
+ x64Gen_writeU8(&x64GenContext, 0xE8);
+ x64Gen_writeU8(&x64GenContext, 0x00);
+ x64Gen_writeU8(&x64GenContext, 0x00);
+ x64Gen_writeU8(&x64GenContext, 0x00);
+ x64Gen_writeU8(&x64GenContext, 0x00);
+ //000000007775EF09 | 48 83 04 24 05 add qword ptr ss:[rsp],5
+ x64Gen_writeU8(&x64GenContext, 0x48);
+ x64Gen_writeU8(&x64GenContext, 0x83);
+ x64Gen_writeU8(&x64GenContext, 0x04);
+ x64Gen_writeU8(&x64GenContext, 0x24);
+ uint32 jmpPatchOffset = x64GenContext.emitter->GetWriteIndex();
+ x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP
+ x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP);
+
+ // MOV RSP, RDX (ppc interpreter instance)
+ x64Gen_mov_reg64_reg64(&x64GenContext, REG_RESV_HCPU, X86_REG_RDX);
+ // MOV R15, ppcRecompilerInstanceData
+ x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData);
+ // MOV R13, memory_base
+ x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base);
+
+ //JMP recFunc
+ x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1
+
+ x64GenContext.emitter->GetBuffer()[jmpPatchOffset] = (x64GenContext.emitter->GetWriteIndex() -(jmpPatchOffset-4));
+
+ //recompilerExit1:
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R15);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R14);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R13);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R12);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R11);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R10);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R9);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_R8);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RSI);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RDI);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RBP);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RBX);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RDX);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RCX);
+ x64Gen_pop_reg64(&x64GenContext, X86_REG_RAX);
+ // RET
+ x64Gen_ret(&x64GenContext);
+
+ uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
+ // copy code to executable memory
+ memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes());
+ PPCRecompiler_enterRecompilerCode = (void ATTR_MS_ABI (*)(uint64,uint64))executableMemory;
+}
+
+
+void* PPCRecompilerX64Gen_generateLeaveRecompilerCode()
+{
+ x64GenContext_t x64GenContext{};
+
+ // update instruction pointer
+ // LR is in EDX
+ x64Emit_mov_mem32_reg32(&x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), X86_REG_EDX);
+ // MOV RSP, [hCPU->rspTemp]
+ x64Emit_mov_reg64_mem64(&x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp));
+ // RET
+ x64Gen_ret(&x64GenContext);
+
+ uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
+ // copy code to executable memory
+ memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes());
+ return executableMemory;
+}
+
+void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions()
+{
+ PPCRecompilerX64Gen_generateEnterRecompilerCode();
+ PPCRecompiler_leaveRecompilerCode_unvisited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode();
+ PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode();
+ cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited);
+}
+
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h
similarity index 81%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h
index 1d37a77e..de415ca9 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h
@@ -1,104 +1,56 @@
-typedef struct
+#include "../PPCRecompiler.h" // todo - get rid of dependency
+
+#include "x86Emitter.h"
+
+struct x64RelocEntry_t
{
+ x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {};
+
uint32 offset;
- uint8 type;
void* extraInfo;
-}x64RelocEntry_t;
+};
-typedef struct
+struct x64GenContext_t
{
- uint8* codeBuffer;
- sint32 codeBufferIndex;
- sint32 codeBufferSize;
- // cr state
- sint32 activeCRRegister; // current x86 condition flags reflect this cr* register
- sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned)
+ IMLSegment* currentSegment{};
+ x86Assembler64* emitter;
+ sint32 m_currentInstructionEmitIndex;
+
+ x64GenContext_t()
+ {
+ emitter = new x86Assembler64();
+ }
+
+ ~x64GenContext_t()
+ {
+ delete emitter;
+ }
+
+ IMLInstruction* GetNextInstruction(sint32 relativeIndex = 1)
+ {
+ sint32 index = m_currentInstructionEmitIndex + relativeIndex;
+ if(index < 0 || index >= (sint32)currentSegment->imlList.size())
+ return nullptr;
+ return currentSegment->imlList.data() + index;
+ }
+
// relocate offsets
- x64RelocEntry_t* relocateOffsetTable;
- sint32 relocateOffsetTableSize;
- sint32 relocateOffsetTableCount;
-}x64GenContext_t;
-
-// Some of these are defined by winnt.h and gnu headers
-#undef REG_EAX
-#undef REG_ECX
-#undef REG_EDX
-#undef REG_EBX
-#undef REG_ESP
-#undef REG_EBP
-#undef REG_ESI
-#undef REG_EDI
-#undef REG_NONE
-#undef REG_RAX
-#undef REG_RCX
-#undef REG_RDX
-#undef REG_RBX
-#undef REG_RSP
-#undef REG_RBP
-#undef REG_RSI
-#undef REG_RDI
-#undef REG_R8
-#undef REG_R9
-#undef REG_R10
-#undef REG_R11
-#undef REG_R12
-#undef REG_R13
-#undef REG_R14
-#undef REG_R15
-
-#define REG_EAX 0
-#define REG_ECX 1
-#define REG_EDX 2
-#define REG_EBX 3
-#define REG_ESP 4 // reserved for low half of hCPU pointer
-#define REG_EBP 5
-#define REG_ESI 6
-#define REG_EDI 7
-#define REG_NONE -1
-
-#define REG_RAX 0
-#define REG_RCX 1
-#define REG_RDX 2
-#define REG_RBX 3
-#define REG_RSP 4 // reserved for hCPU pointer
-#define REG_RBP 5
-#define REG_RSI 6
-#define REG_RDI 7
-#define REG_R8 8
-#define REG_R9 9
-#define REG_R10 10
-#define REG_R11 11
-#define REG_R12 12
-#define REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
-#define REG_R14 14 // reserved as temporary register
-#define REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
-
-#define REG_AL 0
-#define REG_CL 1
-#define REG_DL 2
-#define REG_BL 3
-#define REG_AH 4
-#define REG_CH 5
-#define REG_DH 6
-#define REG_BH 7
+ std::vector relocateOffsetTable2;
+};
// reserved registers
-#define REG_RESV_TEMP (REG_R14)
-#define REG_RESV_HCPU (REG_RSP)
-#define REG_RESV_MEMBASE (REG_R13)
-#define REG_RESV_RECDATA (REG_R15)
+#define REG_RESV_TEMP (X86_REG_R14)
+#define REG_RESV_HCPU (X86_REG_RSP)
+#define REG_RESV_MEMBASE (X86_REG_R13)
+#define REG_RESV_RECDATA (X86_REG_R15)
// reserved floating-point registers
#define REG_RESV_FPR_TEMP (15)
+#define reg32ToReg16(__x) (__x) // deprecated
-extern sint32 x64Gen_registerMap[12];
-
-#define tempToRealRegister(__x) (x64Gen_registerMap[__x])
-#define tempToRealFPRRegister(__x) (__x)
-#define reg32ToReg16(__x) (__x)
-
+// deprecated condition flags
enum
{
X86_CONDITION_EQUAL, // or zero
@@ -119,36 +71,23 @@ enum
X86_CONDITION_NONE, // no condition, jump always
};
-#define PPCREC_CR_TEMPORARY (8) // never stored
-#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI)
-#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
-#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
-
-#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction
-#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset
-#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment
-
-#define PPC_X64_GPR_USABLE_REGISTERS (16-4)
-#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register
-
-
-bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
-
-void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext);
+bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset);
void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
-bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed);
+bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed);
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
-void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
+
+void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
// ASM gen
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v);
@@ -196,9 +135,6 @@ void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstReg
void x64Gen_and_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
-void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
-void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister);
-
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_add_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_add_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
@@ -207,9 +143,6 @@ void x64Gen_sub_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des
void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_sub_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, sint32 memImmS32, uint64 immU32);
-void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
-void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
-void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32);
void x64Gen_imul_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 operandRegister);
void x64Gen_idiv_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
@@ -241,9 +174,7 @@ void x64Gen_not_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_neg_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_cdq(x64GenContext_t* x64GenContext);
-void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister);
-void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister);
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
void x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
@@ -274,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
+void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
@@ -299,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
+void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
@@ -329,4 +262,8 @@ void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64G
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
-void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
\ No newline at end of file
+void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
+void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
+void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
+void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
+void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
similarity index 92%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
index 619c3985..b0ef8640 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
@@ -1,5 +1,4 @@
-#include "PPCRecompiler.h"
-#include "PPCRecompilerX64.h"
+#include "BackendX64.h"
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
@@ -21,11 +20,10 @@ void _x64Gen_vex128_nds(x64GenContext_t* x64GenContext, uint8 opcodeMap, uint8 a
x64Gen_writeU8(x64GenContext, opcode);
}
-#define VEX_PP_0F 0 // guessed
+#define VEX_PP_0F 0
#define VEX_PP_66_0F 1
-#define VEX_PP_F3_0F 2 // guessed
-#define VEX_PP_F2_0F 3 // guessed
-
+#define VEX_PP_F3_0F 2
+#define VEX_PP_F2_0F 3
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
{
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
similarity index 67%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
index 5a71e93d..bbb707e0 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
@@ -1,5 +1,4 @@
-#include "PPCRecompiler.h"
-#include "PPCRecompilerX64.h"
+#include "BackendX64.h"
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
@@ -69,6 +68,34 @@ void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
+void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
+{
+ x64Gen_writeU8(x64GenContext, 0xC4);
+ x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
+ x64Gen_writeU8(x64GenContext, 0x7B - registerB * 8);
+ x64Gen_writeU8(x64GenContext, 0xF7);
+ x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
+}
+
+void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
+{
+ // SARX reg64, reg64, reg64
+ x64Gen_writeU8(x64GenContext, 0xC4);
+ x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
+ x64Gen_writeU8(x64GenContext, 0xFA - registerB * 8);
+ x64Gen_writeU8(x64GenContext, 0xF7);
+ x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
+}
+
+void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
+{
+ x64Gen_writeU8(x64GenContext, 0xC4);
+ x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
+ x64Gen_writeU8(x64GenContext, 0x7A - registerB * 8);
+ x64Gen_writeU8(x64GenContext, 0xF7);
+ x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
+}
+
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
// SHLX reg64, reg64, reg64
@@ -77,4 +104,13 @@ void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist
x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
+}
+
+void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
+{
+ x64Gen_writeU8(x64GenContext, 0xC4);
+ x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
+ x64Gen_writeU8(x64GenContext, 0x79 - registerB * 8);
+ x64Gen_writeU8(x64GenContext, 0xF7);
+ x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
new file mode 100644
index 00000000..6a8b1b97
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
@@ -0,0 +1,469 @@
+#include "../PPCRecompiler.h"
+#include "../IML/IML.h"
+#include "BackendX64.h"
+#include "Common/cpu_features.h"
+
+uint32 _regF64(IMLReg physReg);
+
+uint32 _regI32(IMLReg r)
+{
+ cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32);
+ return (uint32)r.GetRegID();
+}
+
+static x86Assembler64::GPR32 _reg32(sint8 physRegId)
+{
+ return (x86Assembler64::GPR32)physRegId;
+}
+
+static x86Assembler64::GPR8_REX _reg8(IMLReg r)
+{
+ cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); // currently bool regs are implemented as 32bit registers
+ return (x86Assembler64::GPR8_REX)r.GetRegID();
+}
+
+static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
+{
+ return (x86Assembler64::GPR32)regId;
+}
+
+static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
+{
+ return (x86Assembler64::GPR8_REX)regId;
+}
+
+// load from memory
+bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
+{
+ sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
+ sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
+ sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
+ if( indexed )
+ realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
+ uint8 mode = imlInstruction->op_storeLoad.mode;
+
+ if( mode == PPCREC_FPR_LD_MODE_SINGLE )
+ {
+ // load byte swapped single into temporary FPR
+ if( indexed )
+ {
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
+ if(g_CPUFeatures.x86.movbe)
+ x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
+ else
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
+ }
+ else
+ {
+ if(g_CPUFeatures.x86.movbe)
+ x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ else
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
+ }
+ if(g_CPUFeatures.x86.movbe == false )
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP);
+
+ if (imlInstruction->op_storeLoad.flags2.notExpanded)
+ {
+ // leave value as single
+ }
+ else
+ {
+ x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM);
+ }
+ }
+ else if( mode == PPCREC_FPR_LD_MODE_DOUBLE )
+ {
+ if( g_CPUFeatures.x86.avx )
+ {
+ if( indexed )
+ {
+ // calculate offset
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
+ // load value
+ x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0);
+ x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
+ x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP);
+ }
+ else
+ {
+ x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0);
+ x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
+ x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP);
+ }
+ }
+ else
+ {
+ if( indexed )
+ {
+ // calculate offset
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
+ // load double low part to temporaryFPR
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP);
+ // calculate offset again
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
+ // load double high part to temporaryFPR
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP);
+ // load double from temporaryFPR
+ x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
+ }
+ else
+ {
+ // load double low part to temporaryFPR
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP);
+ // load double high part to temporaryFPR
+ x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP);
+ // load double from temporaryFPR
+ x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
+ }
+ }
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+// store to memory
+bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
+{
+ sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
+ sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
+ sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
+ if( indexed )
+ realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
+ uint8 mode = imlInstruction->op_storeLoad.mode;
+ if( mode == PPCREC_FPR_ST_MODE_SINGLE )
+ {
+ if (imlInstruction->op_storeLoad.flags2.notExpanded)
+ {
+ // value is already in single format
+ x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
+ }
+ else
+ {
+ x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM);
+ x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
+ }
+ if(g_CPUFeatures.x86.movbe == false )
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ if( indexed )
+ {
+ if( realRegisterMem == realRegisterMem2 )
+ assert_dbg();
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ if(g_CPUFeatures.x86.movbe)
+ x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ else
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ if( indexed )
+ {
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ }
+ else if( mode == PPCREC_FPR_ST_MODE_DOUBLE )
+ {
+ if( indexed )
+ {
+ if( realRegisterMem == realRegisterMem2 )
+ assert_dbg();
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
+ // store double low part
+ x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP);
+ // store double high part
+ x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP);
+ if( indexed )
+ {
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ }
+ else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 )
+ {
+ x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
+ x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
+ if( indexed )
+ {
+ cemu_assert_debug(realRegisterMem == realRegisterMem2);
+ x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
+ }
+ else
+ {
+ x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
+ }
+ }
+ else
+ {
+ debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
+ return false;
+ }
+ return true;
+}
+
+// FPR op FPR
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT )
+ {
+ uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR);
+ uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA);
+ x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr);
+ return;
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT )
+ {
+ uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
+ uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
+ x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
+ return;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
+ {
+ cemu_assert_debug(imlInstruction->op_fpr_r_r.regR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
+ cemu_assert_debug(imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32); // supporting only 32bit floats as input for now
+ // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
+ uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
+ if (imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32)
+ {
+ uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
+ x64Gen_movq_xmmReg_reg64(x64GenContext, regFpr, regGpr); // using reg32 as reg64 param here is ok. We'll refactor later
+ // float to double
+ x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regFpr, regFpr);
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+ return;
+ }
+
+ uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
+ uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
+ if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN )
+ {
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY )
+ {
+ x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE )
+ {
+ x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD )
+ {
+ x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
+ {
+ x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ )
+ {
+ x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA);
+ x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
+ // move to FPR register
+ x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
+ }
+ else
+ {
+ assert_dbg();
+ }
+}
+
+/*
+ * FPR = op (fprA, fprB)
+ */
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r.regR);
+ uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA);
+ uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB);
+
+ if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
+ {
+ if (regR == regA)
+ {
+ x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ else if (regR == regB)
+ {
+ x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else
+ {
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
+ {
+ // todo: Use AVX 3-operand VADDSD if available
+ if (regR == regA)
+ {
+ x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ else if (regR == regB)
+ {
+ x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ }
+ else
+ {
+ x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
+ x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
+ {
+ if( regR == regA )
+ {
+ x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ else if( regR == regB )
+ {
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
+ x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
+ }
+ else
+ {
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
+ x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ }
+ }
+ else
+ assert_dbg();
+}
+
+/*
+ * FPR = op (fprA, fprB, fprC)
+ */
+void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r_r.regR);
+ uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r_r.regA);
+ uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB);
+ uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC);
+
+ if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT )
+ {
+ x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
+ sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
+ // select C
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC);
+ sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
+ x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
+ // select B
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
+ x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB);
+ // end
+ PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
+ }
+ else
+ assert_dbg();
+}
+
+void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ uint32 regR = _regF64(imlInstruction->op_fpr_r.regR);
+
+ if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE )
+ {
+ x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE )
+ {
+ x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS )
+ {
+ x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS )
+ {
+ x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
+ }
+ else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM )
+ {
+ // convert to 32bit single
+ x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, regR, regR);
+ // convert back to 64bit double
+ x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
+ {
+ // convert bottom to 64bit double
+ x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+}
+
+void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
+{
+ auto regR = _reg8(imlInstruction->op_fpr_compare.regR);
+ auto regA = _regF64(imlInstruction->op_fpr_compare.regA);
+ auto regB = _regF64(imlInstruction->op_fpr_compare.regB);
+
+ x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR));
+ x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB);
+
+ if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT)
+ {
+ // GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both)
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR);
+ return;
+ }
+ else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U)
+ {
+ // unordered case can be checked via PF
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR);
+ return;
+ }
+
+ // remember unordered state
+ auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP));
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions
+
+ X86Cond x86Cond;
+ switch (imlInstruction->op_fpr_compare.cond)
+ {
+ case IMLCondition::UNORDERED_LT:
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR);
+ break;
+ case IMLCondition::UNORDERED_EQ:
+ x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR);
+ break;
+ default:
+ cemu_assert_unimplemented();
+ }
+ x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero
+}
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
similarity index 90%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
index 19327f46..efe929d0 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
@@ -1,62 +1,31 @@
-#include "PPCRecompiler.h"
-#include "PPCRecompilerIml.h"
-#include "PPCRecompilerX64.h"
+#include "BackendX64.h"
// x86/x64 extension opcodes that could be useful:
// ANDN
// mulx, rorx, sarx, shlx, shrx
// PDEP, PEXT
-void x64Gen_checkBuffer(x64GenContext_t* x64GenContext)
-{
- // todo
-}
-
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v)
{
- if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize )
- {
- x64GenContext->codeBufferSize *= 2;
- x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
- }
- *(uint8*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
- x64GenContext->codeBufferIndex++;
+ x64GenContext->emitter->_emitU8(v);
}
void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v)
{
- if( x64GenContext->codeBufferIndex+2 > x64GenContext->codeBufferSize )
- {
- x64GenContext->codeBufferSize *= 2;
- x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
- }
- *(uint16*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
- x64GenContext->codeBufferIndex += 2;
+ x64GenContext->emitter->_emitU16(v);
}
void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v)
{
- if( x64GenContext->codeBufferIndex+4 > x64GenContext->codeBufferSize )
- {
- x64GenContext->codeBufferSize *= 2;
- x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
- }
- *(uint32*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
- x64GenContext->codeBufferIndex += 4;
+ x64GenContext->emitter->_emitU32(v);
}
void x64Gen_writeU64(x64GenContext_t* x64GenContext, uint64 v)
{
- if( x64GenContext->codeBufferIndex+8 > x64GenContext->codeBufferSize )
- {
- x64GenContext->codeBufferSize *= 2;
- x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
- }
- *(uint64*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
- x64GenContext->codeBufferIndex += 8;
+ x64GenContext->emitter->_emitU64(v);
}
-#include "x64Emit.hpp"
+#include "X64Emit.hpp"
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
{
@@ -67,7 +36,7 @@ void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataReg
forceUseOffset = true;
}
- if (memRegisterB64 == REG_NONE)
+ if (memRegisterB64 == X86_REG_NONE)
{
// memRegisterA64 + memImmS32
uint8 modRM = (dataRegister & 7) * 8 + (memRegisterA64 & 7);
@@ -352,7 +321,7 @@ void x64Gen_mov_mem32Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32)
{
// MOV QWORD [+], dataImmU32
- if( memRegister == REG_R14 )
+ if( memRegister == X86_REG_R14 )
{
sint32 memImmS32 = (sint32)memImmU32;
if( memImmS32 == 0 )
@@ -384,7 +353,7 @@ void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
void x64Gen_mov_mem8Reg64_imm8(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint8 dataImmU8)
{
// MOV BYTE [+], dataImmU8
- if( memRegister == REG_RSP )
+ if( memRegister == X86_REG_RSP )
{
sint32 memImmS32 = (sint32)memImmU32;
if( memImmS32 >= -128 && memImmS32 <= 127 )
@@ -625,7 +594,7 @@ void _x64_op_reg64Low_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegist
if (memRegister64 >= 8)
x64Gen_writeU8(x64GenContext, 0x41);
x64Gen_writeU8(x64GenContext, opByte);
- _x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, REG_NONE, memImmS32);
+ _x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, X86_REG_NONE, memImmS32);
}
void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32)
@@ -643,40 +612,6 @@ void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRe
_x64_op_reg64Low_mem8Reg64(x64GenContext, dstRegister, memRegister64, memImmS32, 0x88);
}
-void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister)
-{
- // LOCK CMPXCHG DWORD [ + + ], (low dword)
- x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix
-
- if( srcRegister >= 8 || memRegisterA64 >= 8|| memRegisterB64 >= 8 )
- x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegisterA64>=8)?1:0)+((memRegisterB64>=8)?2:0));
-
- x64Gen_writeU8(x64GenContext, 0x0F);
- x64Gen_writeU8(x64GenContext, 0xB1);
-
- _x64Gen_writeMODRMDeprecated(x64GenContext, srcRegister, memRegisterA64, memRegisterB64, memImmS32);
-}
-
-void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister)
-{
- // LOCK CMPXCHG DWORD [ + ], (low dword)
- x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix
-
- if( srcRegister >= 8 || memRegister64 >= 8 )
- x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegister64>=8)?1:0));
-
- x64Gen_writeU8(x64GenContext, 0x0F);
- x64Gen_writeU8(x64GenContext, 0xB1);
-
- if( memImmS32 == 0 )
- {
- x64Gen_writeU8(x64GenContext, 0x45+(srcRegister&7)*8);
- x64Gen_writeU8(x64GenContext, 0x00);
- }
- else
- assert_dbg();
-}
-
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
{
// ADD ,
@@ -732,7 +667,7 @@ void x64Gen_add_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x05);
@@ -772,7 +707,7 @@ void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x2D);
@@ -811,7 +746,7 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
{
// SUB ,
sint32 immS32 = (sint32)immU32;
- if( memRegister == REG_RSP )
+ if( memRegister == X86_REG_RSP )
{
if( memImmS32 >= 128 )
{
@@ -843,64 +778,11 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
}
}
-void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
-{
- // SBB ,
- if( destRegister >= 8 && srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x45);
- else if( srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x44);
- else if( destRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0x19);
- x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7));
-}
-
-void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
-{
- // ADC ,
- if( destRegister >= 8 && srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x45);
- else if( srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x44);
- else if( destRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0x11);
- x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7));
-}
-
-void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32)
-{
- sint32 immS32 = (sint32)immU32;
- if( srcRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x41);
- if( immS32 >= -128 && immS32 <= 127 )
- {
- x64Gen_writeU8(x64GenContext, 0x83);
- x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7));
- x64Gen_writeU8(x64GenContext, (uint8)immS32);
- }
- else
- {
- if( srcRegister == REG_RAX )
- {
- // special EAX short form
- x64Gen_writeU8(x64GenContext, 0x15);
- }
- else
- {
- x64Gen_writeU8(x64GenContext, 0x81);
- x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7));
- }
- x64Gen_writeU32(x64GenContext, immU32);
- }
-}
-
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32)
{
// DEC dword [+imm]
sint32 memoryImmS32 = (sint32)memoryImmU32;
- if (memoryRegister != REG_RSP)
+ if (memoryRegister != X86_REG_RSP)
assert_dbg(); // not supported yet
if (memoryImmS32 >= -128 && memoryImmS32 <= 127)
{
@@ -981,7 +863,7 @@ void x64Gen_and_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x25);
@@ -1026,7 +908,7 @@ void x64Gen_test_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegi
sint32 immS32 = (sint32)immU32;
if( srcRegister >= 8 )
x64Gen_writeU8(x64GenContext, 0x41);
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0xA9);
@@ -1052,7 +934,7 @@ void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special RAX short form
x64Gen_writeU8(x64GenContext, 0x3D);
@@ -1082,7 +964,7 @@ void x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des
void x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 memRegister, sint32 memImmS32)
{
// CMP , DWORD [+]
- if( memRegister == REG_RSP )
+ if( memRegister == X86_REG_RSP )
{
if( memImmS32 >= -128 && memImmS32 <= 127 )
assert_dbg(); // todo -> Shorter instruction form
@@ -1112,7 +994,7 @@ void x64Gen_or_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegist
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x0D);
@@ -1172,7 +1054,7 @@ void x64Gen_xor_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
}
else
{
- if( srcRegister == REG_RAX )
+ if( srcRegister == X86_REG_RAX )
{
// special EAX short form
x64Gen_writeU8(x64GenContext, 0x35);
@@ -1326,16 +1208,6 @@ void x64Gen_cdq(x64GenContext_t* x64GenContext)
x64Gen_writeU8(x64GenContext, 0x99);
}
-void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister)
-{
- if( destRegister >= 8 )
- x64Gen_writeU8(x64GenContext, 0x41|8);
- else
- x64Gen_writeU8(x64GenContext, 0x40|8);
- x64Gen_writeU8(x64GenContext, 0x0F);
- x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
-}
-
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister)
{
if( destRegister >= 8 )
@@ -1344,16 +1216,6 @@ void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destReg
x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
}
-void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister)
-{
- assert_dbg(); // do not use this instruction, it's result is always undefined. Instead use ROL , 8
- //x64Gen_writeU8(x64GenContext, 0x66);
- //if( destRegister >= 8 )
- // x64Gen_writeU8(x64GenContext, 0x41);
- //x64Gen_writeU8(x64GenContext, 0x0F);
- //x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
-}
-
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
{
// SSE4
@@ -1388,7 +1250,7 @@ void x64Gen_setcc_mem8(x64GenContext_t* x64GenContext, sint32 conditionType, sin
{
// SETcc [+imm]
sint32 memoryImmS32 = (sint32)memoryImmU32;
- if( memoryRegister != REG_RSP )
+ if( memoryRegister != X86_REG_RSP )
assert_dbg(); // not supported
if( memoryRegister >= 8 )
assert_dbg(); // not supported
@@ -1627,7 +1489,7 @@ void x64Gen_bt_mem8(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint3
{
// BT [+imm], bitIndex (bit test)
sint32 memoryImmS32 = (sint32)memoryImmU32;
- if( memoryRegister != REG_RSP )
+ if( memoryRegister != X86_REG_RSP )
assert_dbg(); // not supported yet
if( memoryImmS32 >= -128 && memoryImmS32 <= 127 )
{
@@ -1662,7 +1524,7 @@ void x64Gen_jmp_imm32(x64GenContext_t* x64GenContext, uint32 destImm32)
void x64Gen_jmp_memReg64(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 immU32)
{
- if( memRegister == REG_NONE )
+ if( memRegister == X86_REG_NONE )
{
assert_dbg();
}
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
similarity index 92%
rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
index 92289d68..4bbcc025 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
@@ -1,6 +1,4 @@
-#include "PPCRecompiler.h"
-#include "PPCRecompilerIml.h"
-#include "PPCRecompilerX64.h"
+#include "BackendX64.h"
void x64Gen_genSSEVEXPrefix2(x64GenContext_t* x64GenContext, sint32 xmmRegister1, sint32 xmmRegister2, bool use64BitMode)
{
@@ -44,7 +42,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe
// SSE2
// move two doubles from memory into xmm register
// MOVUPD , [+]
- if( memRegister == REG_ESP )
+ if( memRegister == X86_REG_ESP )
{
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
// 66 0F 10 84 E4 23 01 00 00
@@ -56,7 +54,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_NONE )
+ else if( memRegister == X86_REG_NONE )
{
assert_dbg();
//x64Gen_writeU8(x64GenContext, 0x66);
@@ -76,7 +74,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe
// SSE2
// move two doubles from memory into xmm register
// MOVUPD [+],
- if( memRegister == REG_ESP )
+ if( memRegister == X86_REG_ESP )
{
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
x64Gen_writeU8(x64GenContext, 0x66);
@@ -87,7 +85,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_NONE )
+ else if( memRegister == X86_REG_NONE )
{
assert_dbg();
//x64Gen_writeU8(x64GenContext, 0x66);
@@ -106,7 +104,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
{
// SSE3
// move one double from memory into lower and upper half of a xmm register
- if( memRegister == REG_RSP )
+ if( memRegister == X86_REG_RSP )
{
// MOVDDUP , [+]
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
@@ -119,7 +117,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
x64Gen_writeU8(x64GenContext, 0xE4);
x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_R15 )
+ else if( memRegister == X86_REG_R15 )
{
// MOVDDUP , [+]
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
@@ -131,7 +129,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_NONE )
+ else if( memRegister == X86_REG_NONE )
{
// MOVDDUP , []
// 36 F2 0F 12 05 - 00 00 00 00
@@ -185,7 +183,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
{
// SSE2
// move lower 64bits (double) of xmm register to memory location
- if( memRegister == REG_NONE )
+ if( memRegister == X86_REG_NONE )
{
// MOVSD [],
// F2 0F 11 05 - 45 23 01 00
@@ -197,7 +195,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
//x64Gen_writeU8(x64GenContext, 0x05+xmmRegister*8);
//x64Gen_writeU32(x64GenContext, memImmU32);
}
- else if( memRegister == REG_RSP )
+ else if( memRegister == X86_REG_RSP )
{
// MOVSD [RSP+],
// F2 0F 11 84 24 - 33 22 11 00
@@ -215,11 +213,42 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
}
}
+void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
+{
+ // SSE2
+ if( memRegister == X86_REG_RSP )
+ {
+ // MOVSD , [RSP+]
+ x64Gen_writeU8(x64GenContext, 0xF2);
+ x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
+ x64Gen_writeU8(x64GenContext, 0x0F);
+ x64Gen_writeU8(x64GenContext, 0x10);
+ x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
+ x64Gen_writeU8(x64GenContext, 0x24);
+ x64Gen_writeU32(x64GenContext, memImmU32);
+ }
+ else if( memRegister == 15 )
+ {
+ // MOVSD , [R15+]
+ x64Gen_writeU8(x64GenContext, 0x36);
+ x64Gen_writeU8(x64GenContext, 0xF2);
+ x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false);
+ x64Gen_writeU8(x64GenContext, 0x0F);
+ x64Gen_writeU8(x64GenContext, 0x10);
+ x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
+ x64Gen_writeU32(x64GenContext, memImmU32);
+ }
+ else
+ {
+ assert_dbg();
+ }
+}
+
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE3
// move one double from memory into lower half of a xmm register, leave upper half unchanged(?)
- if( memRegister == REG_NONE )
+ if( memRegister == X86_REG_NONE )
{
// MOVLPD , []
//x64Gen_writeU8(x64GenContext, 0x66);
@@ -229,7 +258,7 @@ void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmReg
//x64Gen_writeU32(x64GenContext, memImmU32);
assert_dbg();
}
- else if( memRegister == REG_RSP )
+ else if( memRegister == X86_REG_RSP )
{
// MOVLPD , [+]
// 66 0F 12 84 24 - 33 22 11 00
@@ -348,11 +377,11 @@ void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
- if (memRegister == REG_NONE)
+ if (memRegister == X86_REG_NONE)
{
assert_dbg();
}
- else if (memRegister == REG_R14)
+ else if (memRegister == X86_REG_R14)
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
@@ -404,7 +433,7 @@ void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
{
// SSE2
// compare bottom double with double from memory location
- if( memoryReg == REG_R15 )
+ if( memoryReg == X86_REG_R15 )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
@@ -432,7 +461,7 @@ void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
{
// SSE2
// compare bottom float with float from memory location
- if (memoryReg == REG_R15)
+ if (memoryReg == X86_REG_R15)
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
x64Gen_writeU8(x64GenContext, 0x0F);
@@ -448,7 +477,7 @@ void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRe
{
// SSE2
// and xmm register with 128 bit value from memory
- if( memReg == REG_R15 )
+ if( memReg == X86_REG_R15 )
{
x64Gen_genSSEVEXPrefix2(x64GenContext, memReg, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
@@ -464,7 +493,7 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
{
// SSE2
// xor xmm register with 128 bit value from memory
- if( memReg == REG_R15 )
+ if( memReg == X86_REG_R15 )
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
x64Gen_writeU8(x64GenContext, 0x0F);
@@ -479,11 +508,11 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
- if (memRegister == REG_NONE)
+ if (memRegister == X86_REG_NONE)
{
assert_dbg();
}
- else if (memRegister == REG_R14)
+ else if (memRegister == X86_REG_R14)
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
@@ -502,7 +531,7 @@ void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
{
// SSE2
// and xmm register with 128 bit value from memory
- if( memReg == REG_R15 )
+ if( memReg == X86_REG_R15 )
{
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
x64Gen_writeU8(x64GenContext, 0x0F);
@@ -528,7 +557,7 @@ void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xm
{
// SSE2
// doubleword integer compare
- if( memReg == REG_R15 )
+ if( memReg == X86_REG_R15 )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
@@ -563,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
}
+void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
+{
+ // SSE2
+ x64Gen_writeU8(x64GenContext, 0xF2);
+ x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
+ x64Gen_writeU8(x64GenContext, 0x0F);
+ x64Gen_writeU8(x64GenContext, 0x2A);
+ x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
+}
+
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2
@@ -610,7 +649,7 @@ void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xm
{
// SSE2
// converts two signed 32bit integers to two doubles
- if( memReg == REG_RSP )
+ if( memReg == X86_REG_RSP )
{
x64Gen_writeU8(x64GenContext, 0x66);
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, false);
@@ -684,7 +723,7 @@ void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
- if( memRegister == REG_NONE )
+ if( memRegister == X86_REG_NONE )
{
assert_dbg();
}
diff --git a/src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
similarity index 99%
rename from src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp
rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
index e936f1d8..b4021931 100644
--- a/src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
@@ -203,7 +203,6 @@ template
void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB)
{
static_assert(TA::getType() == MODRM_OPR_TYPE::REG);
- x64Gen_checkBuffer(x64GenContext);
// REX prefix
// 0100 WRXB
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h
new file mode 100644
index 00000000..eae3835d
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h
@@ -0,0 +1,4335 @@
+#pragma once
+
+// x86-64 assembler/emitter
+// auto generated. Do not edit this file manually
+
+typedef unsigned long long u64;
+typedef unsigned int u32;
+typedef unsigned short u16;
+typedef unsigned char u8;
+typedef signed long long s64;
+typedef signed int s32;
+typedef signed short s16;
+typedef signed char s8;
+
+enum X86Reg : sint8
+{
+ X86_REG_NONE = -1,
+ X86_REG_EAX = 0,
+ X86_REG_ECX = 1,
+ X86_REG_EDX = 2,
+ X86_REG_EBX = 3,
+ X86_REG_ESP = 4,
+ X86_REG_EBP = 5,
+ X86_REG_ESI = 6,
+ X86_REG_EDI = 7,
+ X86_REG_R8D = 8,
+ X86_REG_R9D = 9,
+ X86_REG_R10D = 10,
+ X86_REG_R11D = 11,
+ X86_REG_R12D = 12,
+ X86_REG_R13D = 13,
+ X86_REG_R14D = 14,
+ X86_REG_R15D = 15,
+ X86_REG_RAX = 0,
+ X86_REG_RCX = 1,
+ X86_REG_RDX = 2,
+ X86_REG_RBX = 3,
+ X86_REG_RSP = 4,
+ X86_REG_RBP = 5,
+ X86_REG_RSI = 6,
+ X86_REG_RDI = 7,
+ X86_REG_R8 = 8,
+ X86_REG_R9 = 9,
+ X86_REG_R10 = 10,
+ X86_REG_R11 = 11,
+ X86_REG_R12 = 12,
+ X86_REG_R13 = 13,
+ X86_REG_R14 = 14,
+ X86_REG_R15 = 15
+};
+
+enum X86Cond : u8
+{
+ X86_CONDITION_O = 0,
+ X86_CONDITION_NO = 1,
+ X86_CONDITION_B = 2,
+ X86_CONDITION_NB = 3,
+ X86_CONDITION_Z = 4,
+ X86_CONDITION_NZ = 5,
+ X86_CONDITION_BE = 6,
+ X86_CONDITION_NBE = 7,
+ X86_CONDITION_S = 8,
+ X86_CONDITION_NS = 9,
+ X86_CONDITION_PE = 10,
+ X86_CONDITION_PO = 11,
+ X86_CONDITION_L = 12,
+ X86_CONDITION_NL = 13,
+ X86_CONDITION_LE = 14,
+ X86_CONDITION_NLE = 15
+};
+class x86Assembler64
+{
+private:
+ std::vector m_buffer;
+
+public:
+ u8* GetBufferPtr() { return m_buffer.data(); };
+ std::span GetBuffer() { return m_buffer; };
+ u32 GetWriteIndex() { return (u32)m_buffer.size(); };
+ void _emitU8(u8 v) { m_buffer.emplace_back(v); };
+ void _emitU16(u16 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 2); *(u16*)(m_buffer.data() + writeIdx) = v; };
+ void _emitU32(u32 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 4); *(u32*)(m_buffer.data() + writeIdx) = v; };
+ void _emitU64(u64 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 8); *(u64*)(m_buffer.data() + writeIdx) = v; };
+ using GPR64 = X86Reg;
+ using GPR32 = X86Reg;
+ using GPR8_REX = X86Reg;
+ void LockPrefix() { _emitU8(0xF0); };
+ void ADD_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x00);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADD_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x00);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x02);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x01);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADD_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x01);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADD_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x01);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x01);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x03);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x03);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x08);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void OR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x08);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x09);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void OR_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x09);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void OR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x09);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x09);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void OR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x0b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x10);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADC_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x10);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x12);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x11);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADC_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x11);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void ADC_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x11);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x11);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x13);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADC_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x13);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x18);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SBB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x18);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x1a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x19);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SBB_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x19);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SBB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x19);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x19);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x1b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SBB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x1b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x20);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void AND_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x20);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x22);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x21);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void AND_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x21);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void AND_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x21);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x21);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x23);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void AND_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x23);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x28);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SUB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x28);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x2a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x29);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SUB_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x29);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void SUB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x29);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x29);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x2b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SUB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x2b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x30);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void XOR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x30);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x32);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x31);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void XOR_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x31);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void XOR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x31);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x31);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x33);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XOR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x33);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x38);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMP_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x38);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x3a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x39);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMP_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x39);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMP_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x39);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x39);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x3b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMP_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x3b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void ADD_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void ADD_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void ADD_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void ADD_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void OR_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void OR_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void OR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void OR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void ADC_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void ADC_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void ADC_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void ADC_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void SBB_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void SBB_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void SBB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void SBB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void AND_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void AND_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void AND_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void AND_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void SUB_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void SUB_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void SUB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void SUB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void XOR_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void XOR_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void XOR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void XOR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void CMP_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void CMP_qi32(GPR64 dst, s32 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x81);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ _emitU32((u32)imm);
+ }
+ void CMP_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void CMP_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x81);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void ADD_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void ADD_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void ADD_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void ADD_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void OR_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void OR_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void OR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void OR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void ADC_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void ADC_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void ADC_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void ADC_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SBB_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void SBB_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void SBB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SBB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void AND_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void AND_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void AND_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void AND_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SUB_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void SUB_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void SUB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SUB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void XOR_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void XOR_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void XOR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void XOR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void CMP_di8(GPR32 dst, s8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void CMP_qi8(GPR64 dst, s8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x83);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void CMP_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void CMP_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x83);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void TEST_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x84);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void TEST_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x84);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void TEST_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x85);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void TEST_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x85);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void TEST_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x85);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void TEST_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x85);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XCHG_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((dst >= 4) || (src >= 4))
+ {
+ _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ }
+ _emitU8(0x86);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ }
+ void XCHG_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x86);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XCHG_dd(GPR32 dst, GPR32 src)
+ {
+ if (((dst & 8) != 0) || ((src & 8) != 0))
+ {
+ _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ }
+ _emitU8(0x87);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ }
+ void XCHG_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ _emitU8(0x87);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ }
+ void XCHG_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x87);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void XCHG_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x87);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_bb(GPR8_REX dst, GPR8_REX src)
+ {
+ if ((src >= 4) || (dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x88);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void MOV_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x88);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst >= 4) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x8a);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x89);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void MOV_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x89);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void MOV_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x89);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x89);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x8b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x8b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void MOV_di32(GPR32 dst, s32 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xb8 | ((dst) & 7));
+ _emitU32((u32)imm);
+ }
+ void MOV_qi64(GPR64 dst, s64 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0xb8 | ((dst) & 7));
+ _emitU64((u64)imm);
+ }
+ void CALL_q(GPR64 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xff);
+ _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
+ }
+ void CALL_q_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xff);
+ _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void IMUL_ddi32(GPR32 dst, GPR32 src, s32 imm)
+ {
+ if (((dst & 8) != 0) || ((src & 8) != 0))
+ {
+ _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ }
+ _emitU8(0x69);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ _emitU32((u32)imm);
+ }
+ void IMUL_qqi32(GPR64 dst, GPR64 src, s32 imm)
+ {
+ _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ _emitU8(0x69);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ _emitU32((u32)imm);
+ }
+ void IMUL_ddi32_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x69);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void IMUL_qqi32_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x69);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU32((u32)imm);
+ }
+ void IMUL_ddi8(GPR32 dst, GPR32 src, s8 imm)
+ {
+ if (((dst & 8) != 0) || ((src & 8) != 0))
+ {
+ _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ }
+ _emitU8(0x6b);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ _emitU8((u8)imm);
+ }
+ void IMUL_qqi8(GPR64 dst, GPR64 src, s8 imm)
+ {
+ _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
+ _emitU8(0x6b);
+ _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
+ _emitU8((u8)imm);
+ }
+ void IMUL_ddi8_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((dst & 8) || (memReg & 8))
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x6b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void IMUL_qqi8_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x6b);
+ _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void SHL_b_CL(GPR8_REX dst)
+ {
+ if ((dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd2);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ }
+ void SHL_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd2);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHR_b_CL(GPR8_REX dst)
+ {
+ if ((dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd2);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ }
+ void SHR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd2);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SAR_b_CL(GPR8_REX dst)
+ {
+ if ((dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd2);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ }
+ void SAR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd2);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHL_d_CL(GPR32 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ }
+ void SHL_q_CL(GPR64 dst)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ }
+ void SHL_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHL_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHR_d_CL(GPR32 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ }
+ void SHR_q_CL(GPR64 dst)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
+ }
+ void SHR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SHR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SAR_d_CL(GPR32 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ }
+ void SAR_q_CL(GPR64 dst)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0xd3);
+ _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
+ }
+ void SAR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void SAR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0xd3);
+ _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void JMP_j32(s32 imm)
+ {
+ _emitU8(0xe9);
+ _emitU32((u32)imm);
+ }
+ void Jcc_j32(X86Cond cond, s32 imm)
+ {
+ _emitU8(0x0f);
+ _emitU8(0x80 | (u8)cond);
+ _emitU32((u32)imm);
+ }
+ void SETcc_b(X86Cond cond, GPR8_REX dst)
+ {
+ if ((dst >= 4))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x0f);
+ _emitU8(0x90 | (u8)cond);
+ _emitU8((3 << 6) | (dst & 7));
+ }
+ void SETcc_b_l(X86Cond cond, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0f);
+ _emitU8(0x90);
+ _emitU8((mod << 6) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMPXCHG_dd(GPR32 dst, GPR32 src)
+ {
+ if (((src & 8) != 0) || ((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xb1);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMPXCHG_qq(GPR64 dst, GPR64 src)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
+ _emitU8(0x0f);
+ _emitU8(0xb1);
+ _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
+ }
+ void CMPXCHG_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((src & 8) || (memReg & 8))
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xb1);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void CMPXCHG_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x0f);
+ _emitU8(0xb1);
+ _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ }
+ void BSWAP_d(GPR32 dst)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xc8 | ((dst) & 7));
+ }
+ void BSWAP_q(GPR64 dst)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x0f);
+ _emitU8(0xc8 | ((dst) & 7));
+ }
+ void BT_du8(GPR32 dst, u8 imm)
+ {
+ if (((dst & 8) != 0))
+ {
+ _emitU8(0x40 | ((dst & 8) >> 3));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xba);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void BT_qu8(GPR64 dst, u8 imm)
+ {
+ _emitU8(0x48 | ((dst & 8) >> 3));
+ _emitU8(0x0f);
+ _emitU8(0xba);
+ _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
+ _emitU8((u8)imm);
+ }
+ void BT_du8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
+ }
+ else
+ {
+ if ((memReg & 8))
+ _emitU8(0x40 | ((memReg & 8) >> 1));
+ }
+ _emitU8(0x0f);
+ _emitU8(0xba);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+ void BT_qu8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm)
+ {
+ uint8 mod;
+ if (offset == 0 && (memReg & 7) != 5) mod = 0;
+ else if (offset == (s32)(s8)offset) mod = 1;
+ else mod = 2;
+ bool sib_use = (scaler != 0 && index != X86_REG_NONE);
+ if ((memReg & 7) == 4)
+ {
+ cemu_assert_debug(index == X86_REG_NONE);
+ index = memReg;
+ sib_use = true;
+ }
+ if (sib_use)
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
+ }
+ else
+ {
+ _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
+ }
+ _emitU8(0x0f);
+ _emitU8(0xba);
+ _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
+ if (sib_use)
+ {
+ _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
+ }
+ if (mod == 1) _emitU8((u8)offset);
+ else if (mod == 2) _emitU32((u32)offset);
+ _emitU8((u8)imm);
+ }
+};
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h
new file mode 100644
index 00000000..bc0c27c5
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "IMLInstruction.h"
+#include "IMLSegment.h"
+
+// optimizer passes
+void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext);
+void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext);
+void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext);
+
+void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext);
+
+// debug
+void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut);
+void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false);
+void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false);
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
new file mode 100644
index 00000000..6ae4b591
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
@@ -0,0 +1,5 @@
+#include "IML.h"
+//#include "PPCRecompilerIml.h"
+#include "util/helpers/fixedSizeList.h"
+
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp
new file mode 100644
index 00000000..cd269869
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp
@@ -0,0 +1,561 @@
+#include "IML.h"
+#include "IMLInstruction.h"
+#include "IMLSegment.h"
+#include "IMLRegisterAllocatorRanges.h"
+#include "util/helpers/StringBuf.h"
+
+#include "../PPCRecompiler.h"
+
+const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
+{
+ static char _tempOpcodename[32];
+ uint32 op = iml->operation;
+ if (op == PPCREC_IML_OP_ASSIGN)
+ return "MOV";
+ else if (op == PPCREC_IML_OP_ADD)
+ return "ADD";
+ else if (op == PPCREC_IML_OP_ADD_WITH_CARRY)
+ return "ADC";
+ else if (op == PPCREC_IML_OP_SUB)
+ return "SUB";
+ else if (op == PPCREC_IML_OP_OR)
+ return "OR";
+ else if (op == PPCREC_IML_OP_AND)
+ return "AND";
+ else if (op == PPCREC_IML_OP_XOR)
+ return "XOR";
+ else if (op == PPCREC_IML_OP_LEFT_SHIFT)
+ return "LSH";
+ else if (op == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ return "RSH";
+ else if (op == PPCREC_IML_OP_RIGHT_SHIFT_S)
+ return "ARSH";
+ else if (op == PPCREC_IML_OP_LEFT_ROTATE)
+ return "LROT";
+ else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED)
+ return "MULS";
+ else if (op == PPCREC_IML_OP_DIVIDE_SIGNED)
+ return "DIVS";
+ else if (op == PPCREC_IML_OP_FPR_ASSIGN)
+ return "FMOV";
+ else if (op == PPCREC_IML_OP_FPR_ADD)
+ return "FADD";
+ else if (op == PPCREC_IML_OP_FPR_SUB)
+ return "FSUB";
+ else if (op == PPCREC_IML_OP_FPR_MULTIPLY)
+ return "FMUL";
+ else if (op == PPCREC_IML_OP_FPR_DIVIDE)
+ return "FDIV";
+ else if (op == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
+ return "F32TOF64";
+ else if (op == PPCREC_IML_OP_FPR_ABS)
+ return "FABS";
+ else if (op == PPCREC_IML_OP_FPR_NEGATE)
+ return "FNEG";
+ else if (op == PPCREC_IML_OP_FPR_NEGATIVE_ABS)
+ return "FNABS";
+ else if (op == PPCREC_IML_OP_FPR_FLOAT_TO_INT)
+ return "F2I";
+ else if (op == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
+ return "I2F";
+ else if (op == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
+ return "BITMOVE";
+
+ sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type);
+ return _tempOpcodename;
+}
+
+std::string IMLDebug_GetRegName(IMLReg r)
+{
+ std::string regName;
+ uint32 regId = r.GetRegID();
+ switch (r.GetRegFormat())
+ {
+ case IMLRegFormat::F32:
+ regName.append("f");
+ break;
+ case IMLRegFormat::F64:
+ regName.append("fd");
+ break;
+ case IMLRegFormat::I32:
+ regName.append("i");
+ break;
+ case IMLRegFormat::I64:
+ regName.append("r");
+ break;
+ default:
+ DEBUG_BREAK;
+ }
+ regName.append(fmt::format("{}", regId));
+ return regName;
+}
+
+void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false)
+{
+ strOutput.add(IMLDebug_GetRegName(virtualRegister));
+ if (!isLast)
+ strOutput.add(", ");
+}
+
+void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false)
+{
+ if (val < 0)
+ {
+ strOutput.add("-");
+ val = -val;
+ }
+ strOutput.addFmt("0x{:08x}", val);
+ if (!isLast)
+ strOutput.add(", ");
+}
+
+void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset)
+{
+ // pad to 70 characters
+ sint32 index = currentLineText.getLen();
+ while (index < 70)
+ {
+ currentLineText.add(" ");
+ index++;
+ }
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ if (subrangeItr->interval.start.GetInstructionIndexEx() == offset)
+ {
+ if(subrangeItr->interval.start.IsInstructionIndex() && !subrangeItr->interval.start.IsOnInputEdge())
+ currentLineText.add(".");
+ else
+ currentLineText.add("|");
+
+ currentLineText.addFmt("{:<4}", subrangeItr->GetVirtualRegister());
+ }
+ else if (subrangeItr->interval.end.GetInstructionIndexEx() == offset)
+ {
+ if(subrangeItr->interval.end.IsInstructionIndex() && !subrangeItr->interval.end.IsOnOutputEdge())
+ currentLineText.add("* ");
+ else
+ currentLineText.add("| ");
+ }
+ else if (subrangeItr->interval.ContainsInstructionIndexEx(offset))
+ {
+ currentLineText.add("| ");
+ }
+ else
+ {
+ currentLineText.add(" ");
+ }
+ index += 5;
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+}
+
+std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg)
+{
+ if (!ctx)
+ {
+ return "";
+ }
+ // find segment index
+ for (size_t i = 0; i < ctx->segmentList2.size(); i++)
+ {
+ if (ctx->segmentList2[i] == seg)
+ {
+ return fmt::format("Seg{:04x}", i);
+ }
+ }
+ return "";
+}
+
+std::string IMLDebug_GetConditionName(IMLCondition cond)
+{
+ switch (cond)
+ {
+ case IMLCondition::EQ:
+ return "EQ";
+ case IMLCondition::NEQ:
+ return "NEQ";
+ case IMLCondition::UNSIGNED_GT:
+ return "UGT";
+ case IMLCondition::UNSIGNED_LT:
+ return "ULT";
+ case IMLCondition::SIGNED_GT:
+ return "SGT";
+ case IMLCondition::SIGNED_LT:
+ return "SLT";
+ default:
+ cemu_assert_unimplemented();
+ }
+ return "ukn";
+}
+
+void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut)
+{
+ const sint32 lineOffsetParameters = 10;//18;
+
+ StringBuf strOutput(1024);
+ strOutput.reset();
+ if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R)
+ {
+ if (inst.type == PPCREC_IML_TYPE_R_NAME)
+ strOutput.add("R_NAME");
+ else
+ strOutput.add("NAME_R");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ if(inst.type == PPCREC_IML_TYPE_R_NAME)
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR);
+
+ strOutput.add("name_");
+ if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999))
+ {
+ strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
+ }
+ if (inst.op_r_name.name >= PPCREC_NAME_FPR_HALF && inst.op_r_name.name < (PPCREC_NAME_FPR_HALF + 32*2))
+ {
+ strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR_HALF - inst.op_r_name.name)/2));
+ if ((inst.op_r_name.name-PPCREC_NAME_FPR_HALF)&1)
+ strOutput.add(".ps1");
+ else
+ strOutput.add(".ps0");
+ }
+ else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999))
+ {
+ strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0);
+ }
+ else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST)
+ strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR);
+ else if (inst.op_r_name.name == PPCREC_NAME_XER_CA)
+ strOutput.add("xer.ca");
+ else if (inst.op_r_name.name == PPCREC_NAME_XER_SO)
+ strOutput.add("xer.so");
+ else if (inst.op_r_name.name == PPCREC_NAME_XER_OV)
+ strOutput.add("xer.ov");
+ else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA)
+ strOutput.add("cpuReservation.ea");
+ else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL)
+ strOutput.add("cpuReservation.value");
+ else
+ {
+ strOutput.addFmt("name_ukn{}", inst.op_r_name.name);
+ }
+ if (inst.type != PPCREC_IML_TYPE_R_NAME)
+ {
+ strOutput.add(", ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true);
+ }
+
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R_R)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_COMPARE)
+ {
+ strOutput.add("CMP ");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB);
+ strOutput.addFmt("{}", IMLDebug_GetConditionName(inst.op_compare.cond));
+ strOutput.add(" -> ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ strOutput.add("CMP ");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA);
+ strOutput.addFmt("{}", inst.op_compare_s32.immS32);
+ strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond));
+ strOutput.add(" -> ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ strOutput.add("CJUMP ");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true);
+ if (!inst.op_conditional_jump.mustBeTrue)
+ strOutput.add("(inverted)");
+ }
+ else if (inst.type == PPCREC_IML_TYPE_JUMP)
+ {
+ strOutput.add("JUMP");
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA);
+ IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA);
+ IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_R_S32)
+ {
+ strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR);
+ IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE ||
+ inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
+ {
+ if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ strOutput.add("LD_");
+ else
+ strOutput.add("ST_");
+
+ if (inst.op_storeLoad.flags2.signExtend)
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{}", inst.op_storeLoad.copyWidth);
+
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData);
+
+ if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
+ strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2));
+ else
+ strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ strOutput.add("ATOMIC_ST_U32");
+
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue);
+ IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_NO_OP)
+ {
+ strOutput.add("NOP");
+ }
+ else if (inst.type == PPCREC_IML_TYPE_MACRO)
+ {
+ if (inst.operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg));
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_BL)
+ {
+ strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16);
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_B_FAR)
+ {
+ strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16);
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_LEAVE)
+ {
+ strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param);
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_HLE)
+ {
+ strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2);
+ }
+ else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES)
+ {
+ strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param);
+ }
+ else
+ {
+ strOutput.addFmt("MACRO ukn operation {}", inst.operation);
+ }
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD)
+ {
+ strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData));
+ if (inst.op_storeLoad.flags2.signExtend)
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode);
+ if (inst.op_storeLoad.flags2.notExpanded)
+ {
+ strOutput.addFmt(" ");
+ }
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_STORE)
+ {
+ if (inst.op_storeLoad.flags2.signExtend)
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32);
+ strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode);
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_R)
+ {
+ strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
+ strOutput.addFmt("{}", IMLDebug_GetRegName(inst.op_fpr_r.regR));
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_R_R)
+ {
+ strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
+ strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA));
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ {
+ strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
+ strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC));
+ }
+ else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R)
+ {
+ strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
+ strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB));
+ }
+ else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
+ {
+ strOutput.addFmt("CYCLE_CHECK");
+ }
+ else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ {
+ strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond));
+ }
+ else
+ {
+ strOutput.addFmt("Unknown iml type {}", inst.type);
+ }
+ disassemblyLineOut.assign(strOutput.c_str());
+}
+
+void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
+{
+ StringBuf strOutput(4096);
+
+ strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth);
+ if (imlSegment->isEnterable)
+ {
+ strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress);
+ }
+ if (imlSegment->deadCodeEliminationHintSeg)
+ {
+ strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg));
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+
+ if (printLivenessRangeInfo)
+ {
+ strOutput.reset();
+ IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START);
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ }
+ //debug_printf("\n");
+ strOutput.reset();
+
+ std::string disassemblyLine;
+ for (sint32 i = 0; i < imlSegment->imlList.size(); i++)
+ {
+ const IMLInstruction& inst = imlSegment->imlList[i];
+ // don't log NOP instructions
+ if (inst.type == PPCREC_IML_TYPE_NO_OP)
+ continue;
+ strOutput.reset();
+ strOutput.addFmt("{:02x} ", i);
+ //cemuLog_log(LogType::Force, "{:02x} ", i);
+ disassemblyLine.clear();
+ IMLDebug_DisassembleInstruction(inst, disassemblyLine);
+ strOutput.add(disassemblyLine);
+ if (printLivenessRangeInfo)
+ {
+ IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i);
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ }
+ // all ranges
+ if (printLivenessRangeInfo)
+ {
+ strOutput.reset();
+ strOutput.add("Ranges-VirtReg ");
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ strOutput.addFmt("v{:<4}", (uint32)subrangeItr->GetVirtualRegister());
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ strOutput.reset();
+ strOutput.add("Ranges-PhysReg ");
+ subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ strOutput.addFmt("p{:<4}", subrangeItr->GetPhysicalRegister());
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ }
+ // branch info
+ strOutput.reset();
+ strOutput.add("Links from: ");
+ for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++)
+ {
+ if (i)
+ strOutput.add(", ");
+ strOutput.addFmt("{}", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str());
+ }
+ cemuLog_log(LogType::Force, "{}", strOutput.c_str());
+ if (imlSegment->nextSegmentBranchNotTaken)
+ cemuLog_log(LogType::Force, "BranchNotTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str());
+ if (imlSegment->nextSegmentBranchTaken)
+ cemuLog_log(LogType::Force, "BranchTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str());
+ if (imlSegment->nextSegmentIsUncertain)
+ cemuLog_log(LogType::Force, "Dynamic target");
+}
+
+void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo)
+{
+ for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++)
+ {
+ IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo);
+ cemuLog_log(LogType::Force, "");
+ }
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp
new file mode 100644
index 00000000..997de4e9
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp
@@ -0,0 +1,536 @@
+#include "IMLInstruction.h"
+#include "IML.h"
+
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+
+// return true if an instruction has side effects on top of just reading and writing registers
+bool IMLInstruction::HasSideEffects() const
+{
+ bool hasSideEffects = true;
+ if(type == PPCREC_IML_TYPE_R_R || type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32)
+ hasSideEffects = false;
+ // todo - add more cases
+ return hasSideEffects;
+}
+
+void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
+{
+ registersUsed->readGPR1 = IMLREG_INVALID;
+ registersUsed->readGPR2 = IMLREG_INVALID;
+ registersUsed->readGPR3 = IMLREG_INVALID;
+ registersUsed->readGPR4 = IMLREG_INVALID;
+ registersUsed->writtenGPR1 = IMLREG_INVALID;
+ registersUsed->writtenGPR2 = IMLREG_INVALID;
+ if (type == PPCREC_IML_TYPE_R_NAME)
+ {
+ registersUsed->writtenGPR1 = op_r_name.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_NAME_R)
+ {
+ registersUsed->readGPR1 = op_r_name.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_R_R)
+ {
+ if (operation == PPCREC_IML_OP_X86_CMP)
+ {
+ // both operands are read only
+ registersUsed->readGPR1 = op_r_r.regR;
+ registersUsed->readGPR2 = op_r_r.regA;
+ }
+ else if (
+ operation == PPCREC_IML_OP_ASSIGN ||
+ operation == PPCREC_IML_OP_ENDIAN_SWAP ||
+ operation == PPCREC_IML_OP_CNTLZW ||
+ operation == PPCREC_IML_OP_NOT ||
+ operation == PPCREC_IML_OP_NEG ||
+ operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 ||
+ operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32)
+ {
+ // result is written, operand is read
+ registersUsed->writtenGPR1 = op_r_r.regR;
+ registersUsed->readGPR1 = op_r_r.regA;
+ }
+ else
+ cemu_assert_unimplemented();
+ }
+ else if (type == PPCREC_IML_TYPE_R_S32)
+ {
+ cemu_assert_debug(operation != PPCREC_IML_OP_ADD &&
+ operation != PPCREC_IML_OP_SUB &&
+ operation != PPCREC_IML_OP_AND &&
+ operation != PPCREC_IML_OP_OR &&
+ operation != PPCREC_IML_OP_XOR); // deprecated, use r_r_s32 for these
+
+ if (operation == PPCREC_IML_OP_LEFT_ROTATE)
+ {
+ // register operand is read and write
+ registersUsed->readGPR1 = op_r_immS32.regR;
+ registersUsed->writtenGPR1 = op_r_immS32.regR;
+ }
+ else if (operation == PPCREC_IML_OP_X86_CMP)
+ {
+ // register operand is read only
+ registersUsed->readGPR1 = op_r_immS32.regR;
+ }
+ else
+ {
+ // register operand is write only
+ // todo - use explicit lists, avoid default cases
+ registersUsed->writtenGPR1 = op_r_immS32.regR;
+ }
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ registersUsed->writtenGPR1 = op_r_r_s32.regR;
+ registersUsed->readGPR1 = op_r_r_s32.regA;
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ registersUsed->writtenGPR1 = op_r_r_s32_carry.regR;
+ registersUsed->readGPR1 = op_r_r_s32_carry.regA;
+ // some operations read carry
+ switch (operation)
+ {
+ case PPCREC_IML_OP_ADD_WITH_CARRY:
+ registersUsed->readGPR2 = op_r_r_s32_carry.regCarry;
+ break;
+ case PPCREC_IML_OP_ADD:
+ break;
+ default:
+ cemu_assert_unimplemented();
+ }
+ // carry is always written
+ registersUsed->writtenGPR2 = op_r_r_s32_carry.regCarry;
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_R)
+ {
+ // in all cases result is written and other operands are read only
+ // with the exception of XOR, where if regA == regB then all bits are zeroed out. So we don't consider it a read
+ registersUsed->writtenGPR1 = op_r_r_r.regR;
+ if(!(operation == PPCREC_IML_OP_XOR && op_r_r_r.regA == op_r_r_r.regB))
+ {
+ registersUsed->readGPR1 = op_r_r_r.regA;
+ registersUsed->readGPR2 = op_r_r_r.regB;
+ }
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ registersUsed->writtenGPR1 = op_r_r_r_carry.regR;
+ registersUsed->readGPR1 = op_r_r_r_carry.regA;
+ registersUsed->readGPR2 = op_r_r_r_carry.regB;
+ // some operations read carry
+ switch (operation)
+ {
+ case PPCREC_IML_OP_ADD_WITH_CARRY:
+ registersUsed->readGPR3 = op_r_r_r_carry.regCarry;
+ break;
+ case PPCREC_IML_OP_ADD:
+ break;
+ default:
+ cemu_assert_unimplemented();
+ }
+ // carry is always written
+ registersUsed->writtenGPR2 = op_r_r_r_carry.regCarry;
+ }
+ else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
+ {
+ // no effect on registers
+ }
+ else if (type == PPCREC_IML_TYPE_NO_OP)
+ {
+ // no effect on registers
+ }
+ else if (type == PPCREC_IML_TYPE_MACRO)
+ {
+ if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE)
+ {
+ // no effect on registers
+ }
+ else if (operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ cemu_assert_debug(op_macro.paramReg.IsValid());
+ registersUsed->readGPR1 = op_macro.paramReg;
+ }
+ else
+ cemu_assert_unimplemented();
+ }
+ else if (type == PPCREC_IML_TYPE_COMPARE)
+ {
+ registersUsed->readGPR1 = op_compare.regA;
+ registersUsed->readGPR2 = op_compare.regB;
+ registersUsed->writtenGPR1 = op_compare.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ registersUsed->readGPR1 = op_compare_s32.regA;
+ registersUsed->writtenGPR1 = op_compare_s32.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ registersUsed->readGPR1 = op_conditional_jump.registerBool;
+ }
+ else if (type == PPCREC_IML_TYPE_JUMP)
+ {
+ // no registers affected
+ }
+ else if (type == PPCREC_IML_TYPE_LOAD)
+ {
+ registersUsed->writtenGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR1 = op_storeLoad.registerMem;
+ }
+ else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ {
+ registersUsed->writtenGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR1 = op_storeLoad.registerMem;
+ if (op_storeLoad.registerMem2.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem2;
+ }
+ else if (type == PPCREC_IML_TYPE_STORE)
+ {
+ registersUsed->readGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem;
+ }
+ else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
+ {
+ registersUsed->readGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem;
+ if (op_storeLoad.registerMem2.IsValid())
+ registersUsed->readGPR3 = op_storeLoad.registerMem2;
+ }
+ else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ registersUsed->readGPR1 = op_atomic_compare_store.regEA;
+ registersUsed->readGPR2 = op_atomic_compare_store.regCompareValue;
+ registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue;
+ registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut;
+ }
+ else if (type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ if (op_call_imm.regParam0.IsValid())
+ registersUsed->readGPR1 = op_call_imm.regParam0;
+ if (op_call_imm.regParam1.IsValid())
+ registersUsed->readGPR2 = op_call_imm.regParam1;
+ if (op_call_imm.regParam2.IsValid())
+ registersUsed->readGPR3 = op_call_imm.regParam2;
+ registersUsed->writtenGPR1 = op_call_imm.regReturn;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_LOAD)
+ {
+ // fpr load operation
+ registersUsed->writtenGPR1 = op_storeLoad.registerData;
+ // address is in gpr register
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR1 = op_storeLoad.registerMem;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
+ {
+ // fpr load operation
+ registersUsed->writtenGPR1 = op_storeLoad.registerData;
+ // address is in gpr registers
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR1 = op_storeLoad.registerMem;
+ if (op_storeLoad.registerMem2.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem2;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_STORE)
+ {
+ // fpr store operation
+ registersUsed->readGPR1 = op_storeLoad.registerData;
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
+ {
+ // fpr store operation
+ registersUsed->readGPR1 = op_storeLoad.registerData;
+ // address is in gpr registers
+ if (op_storeLoad.registerMem.IsValid())
+ registersUsed->readGPR2 = op_storeLoad.registerMem;
+ if (op_storeLoad.registerMem2.IsValid())
+ registersUsed->readGPR3 = op_storeLoad.registerMem2;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R)
+ {
+ // fpr operation
+ if (
+ operation == PPCREC_IML_OP_FPR_ASSIGN ||
+ operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
+ operation == PPCREC_IML_OP_FPR_FCTIWZ
+ )
+ {
+ registersUsed->readGPR1 = op_fpr_r_r.regA;
+ registersUsed->writtenGPR1 = op_fpr_r_r.regR;
+ }
+ else if (operation == PPCREC_IML_OP_FPR_MULTIPLY ||
+ operation == PPCREC_IML_OP_FPR_DIVIDE ||
+ operation == PPCREC_IML_OP_FPR_ADD ||
+ operation == PPCREC_IML_OP_FPR_SUB)
+ {
+ registersUsed->readGPR1 = op_fpr_r_r.regA;
+ registersUsed->readGPR2 = op_fpr_r_r.regR;
+ registersUsed->writtenGPR1 = op_fpr_r_r.regR;
+
+ }
+ else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
+ operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ||
+ operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
+ {
+ registersUsed->writtenGPR1 = op_fpr_r_r.regR;
+ registersUsed->readGPR1 = op_fpr_r_r.regA;
+ }
+ else
+ cemu_assert_unimplemented();
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
+ {
+ // fpr operation
+ registersUsed->readGPR1 = op_fpr_r_r_r.regA;
+ registersUsed->readGPR2 = op_fpr_r_r_r.regB;
+ registersUsed->writtenGPR1 = op_fpr_r_r_r.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ {
+ // fpr operation
+ registersUsed->readGPR1 = op_fpr_r_r_r_r.regA;
+ registersUsed->readGPR2 = op_fpr_r_r_r_r.regB;
+ registersUsed->readGPR3 = op_fpr_r_r_r_r.regC;
+ registersUsed->writtenGPR1 = op_fpr_r_r_r_r.regR;
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R)
+ {
+ // fpr operation
+ if (operation == PPCREC_IML_OP_FPR_NEGATE ||
+ operation == PPCREC_IML_OP_FPR_ABS ||
+ operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS ||
+ operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
+ operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
+ {
+ registersUsed->readGPR1 = op_fpr_r.regR;
+ registersUsed->writtenGPR1 = op_fpr_r.regR;
+ }
+ else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE)
+ {
+ registersUsed->writtenGPR1 = op_fpr_r.regR;
+ }
+ else
+ cemu_assert_unimplemented();
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
+ {
+ registersUsed->writtenGPR1 = op_fpr_compare.regR;
+ registersUsed->readGPR1 = op_fpr_compare.regA;
+ registersUsed->readGPR2 = op_fpr_compare.regB;
+ }
+ else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ {
+ // no registers read or written (except for the implicit eflags)
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+}
+
+IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map& translationTable)
+{
+ if (reg.IsInvalid())
+ return reg;
+ const auto& it = translationTable.find(reg.GetRegID());
+ cemu_assert_debug(it != translationTable.cend());
+ IMLReg alteredReg = reg;
+ alteredReg.SetRegID(it->second);
+ return alteredReg;
+}
+
+void IMLInstruction::RewriteGPR(const std::unordered_map& translationTable)
+{
+ if (type == PPCREC_IML_TYPE_R_NAME)
+ {
+ op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_NAME_R)
+ {
+ op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R)
+ {
+ op_r_r.regR = replaceRegisterIdMultiple(op_r_r.regR, translationTable);
+ op_r_r.regA = replaceRegisterIdMultiple(op_r_r.regA, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_S32)
+ {
+ op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable);
+ op_r_r_s32.regA = replaceRegisterIdMultiple(op_r_r_s32.regA, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
+ {
+ op_r_r_s32_carry.regR = replaceRegisterIdMultiple(op_r_r_s32_carry.regR, translationTable);
+ op_r_r_s32_carry.regA = replaceRegisterIdMultiple(op_r_r_s32_carry.regA, translationTable);
+ op_r_r_s32_carry.regCarry = replaceRegisterIdMultiple(op_r_r_s32_carry.regCarry, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_R)
+ {
+ op_r_r_r.regR = replaceRegisterIdMultiple(op_r_r_r.regR, translationTable);
+ op_r_r_r.regA = replaceRegisterIdMultiple(op_r_r_r.regA, translationTable);
+ op_r_r_r.regB = replaceRegisterIdMultiple(op_r_r_r.regB, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
+ {
+ op_r_r_r_carry.regR = replaceRegisterIdMultiple(op_r_r_r_carry.regR, translationTable);
+ op_r_r_r_carry.regA = replaceRegisterIdMultiple(op_r_r_r_carry.regA, translationTable);
+ op_r_r_r_carry.regB = replaceRegisterIdMultiple(op_r_r_r_carry.regB, translationTable);
+ op_r_r_r_carry.regCarry = replaceRegisterIdMultiple(op_r_r_r_carry.regCarry, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_COMPARE)
+ {
+ op_compare.regR = replaceRegisterIdMultiple(op_compare.regR, translationTable);
+ op_compare.regA = replaceRegisterIdMultiple(op_compare.regA, translationTable);
+ op_compare.regB = replaceRegisterIdMultiple(op_compare.regB, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_COMPARE_S32)
+ {
+ op_compare_s32.regR = replaceRegisterIdMultiple(op_compare_s32.regR, translationTable);
+ op_compare_s32.regA = replaceRegisterIdMultiple(op_compare_s32.regA, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
+ {
+ op_conditional_jump.registerBool = replaceRegisterIdMultiple(op_conditional_jump.registerBool, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP)
+ {
+ // no effect on registers
+ }
+ else if (type == PPCREC_IML_TYPE_NO_OP)
+ {
+ // no effect on registers
+ }
+ else if (type == PPCREC_IML_TYPE_MACRO)
+ {
+ if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_COUNT_CYCLES)
+ {
+ // no effect on registers
+ }
+ else if (operation == PPCREC_IML_MACRO_B_TO_REG)
+ {
+ op_macro.paramReg = replaceRegisterIdMultiple(op_macro.paramReg, translationTable);
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+ }
+ else if (type == PPCREC_IML_TYPE_LOAD)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ if (op_storeLoad.registerMem.IsValid())
+ {
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ }
+ }
+ else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ if (op_storeLoad.registerMem.IsValid())
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ if (op_storeLoad.registerMem2.IsValid())
+ op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_STORE)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ if (op_storeLoad.registerMem.IsValid())
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ if (op_storeLoad.registerMem.IsValid())
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ if (op_storeLoad.registerMem2.IsValid())
+ op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ op_atomic_compare_store.regEA = replaceRegisterIdMultiple(op_atomic_compare_store.regEA, translationTable);
+ op_atomic_compare_store.regCompareValue = replaceRegisterIdMultiple(op_atomic_compare_store.regCompareValue, translationTable);
+ op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable);
+ op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ op_call_imm.regReturn = replaceRegisterIdMultiple(op_call_imm.regReturn, translationTable);
+ if (op_call_imm.regParam0.IsValid())
+ op_call_imm.regParam0 = replaceRegisterIdMultiple(op_call_imm.regParam0, translationTable);
+ if (op_call_imm.regParam1.IsValid())
+ op_call_imm.regParam1 = replaceRegisterIdMultiple(op_call_imm.regParam1, translationTable);
+ if (op_call_imm.regParam2.IsValid())
+ op_call_imm.regParam2 = replaceRegisterIdMultiple(op_call_imm.regParam2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_LOAD)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_STORE)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
+ {
+ op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
+ op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
+ op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R)
+ {
+ op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R)
+ {
+ op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, translationTable);
+ op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
+ {
+ op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, translationTable);
+ op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, translationTable);
+ op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ {
+ op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, translationTable);
+ op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, translationTable);
+ op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, translationTable);
+ op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
+ {
+ op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, translationTable);
+ op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable);
+ op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable);
+ }
+ else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ {
+ // no registers read or written (except for the implicit eflags)
+ }
+ else
+ {
+ cemu_assert_unimplemented();
+ }
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h
new file mode 100644
index 00000000..4df2a666
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h
@@ -0,0 +1,826 @@
+#pragma once
+
+using IMLRegID = uint16; // 16 bit ID
+using IMLPhysReg = sint32; // arbitrary value that is up to the architecture backend, usually this will be the register index. A value of -1 is reserved and means not assigned
+
+// format of IMLReg:
+// 0-15 (16 bit) IMLRegID
+// 19-23 (5 bit) Offset In elements, for SIMD registers
+// 24-27 (4 bit) IMLRegFormat RegFormat
+// 28-31 (4 bit) IMLRegFormat BaseFormat
+
+enum class IMLRegFormat : uint8
+{
+ INVALID_FORMAT,
+ I64,
+ I32,
+ I16,
+ I8,
+ // I1 ?
+ F64,
+ F32,
+ TYPE_COUNT,
+};
+
+class IMLReg
+{
+public:
+ IMLReg()
+ {
+ m_raw = 0; // 0 is invalid
+ }
+
+ IMLReg(IMLRegFormat baseRegFormat, IMLRegFormat regFormat, uint8 viewOffset, IMLRegID regId)
+ {
+ m_raw = 0;
+ m_raw |= ((uint8)baseRegFormat << 28);
+ m_raw |= ((uint8)regFormat << 24);
+ m_raw |= (uint32)regId;
+ }
+
+ IMLReg(IMLReg&& baseReg, IMLRegFormat viewFormat, uint8 viewOffset, IMLRegID regId)
+ {
+ DEBUG_BREAK;
+ //m_raw = 0;
+ //m_raw |= ((uint8)baseRegFormat << 28);
+ //m_raw |= ((uint8)viewFormat << 24);
+ //m_raw |= (uint32)regId;
+ }
+
+ IMLReg(const IMLReg& other) : m_raw(other.m_raw) {}
+
+ IMLRegFormat GetBaseFormat() const
+ {
+ return (IMLRegFormat)((m_raw >> 28) & 0xF);
+ }
+
+ IMLRegFormat GetRegFormat() const
+ {
+ return (IMLRegFormat)((m_raw >> 24) & 0xF);
+ }
+
+ IMLRegID GetRegID() const
+ {
+ cemu_assert_debug(GetBaseFormat() != IMLRegFormat::INVALID_FORMAT);
+ cemu_assert_debug(GetRegFormat() != IMLRegFormat::INVALID_FORMAT);
+ return (IMLRegID)(m_raw & 0xFFFF);
+ }
+
+ void SetRegID(IMLRegID regId)
+ {
+ cemu_assert_debug(regId <= 0xFFFF);
+ m_raw &= ~0xFFFF;
+ m_raw |= (uint32)regId;
+ }
+
+ bool IsInvalid() const
+ {
+ return GetBaseFormat() == IMLRegFormat::INVALID_FORMAT;
+ }
+
+ bool IsValid() const
+ {
+ return GetBaseFormat() != IMLRegFormat::INVALID_FORMAT;
+ }
+
+ bool IsValidAndSameRegID(IMLRegID regId) const
+ {
+ return IsValid() && GetRegID() == regId;
+ }
+
+ // compare all fields
+ bool operator==(const IMLReg& other) const
+ {
+ return m_raw == other.m_raw;
+ }
+
+private:
+ uint32 m_raw;
+};
+
+static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0);
+static const IMLRegID IMLRegID_INVALID(0xFFFF);
+
+using IMLName = uint32;
+
+enum
+{
+ PPCREC_IML_OP_ASSIGN, // '=' operator
+ PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
+ PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
+ PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
+ PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
+ PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
+ PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
+
+ // binary operation
+ PPCREC_IML_OP_OR, // '|' operator
+ PPCREC_IML_OP_AND, // '&' operator
+ PPCREC_IML_OP_XOR, // '^' operator
+ PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
+ PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
+ PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned)
+ PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed)
+ // ppc
+ PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
+ PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
+ PPCREC_IML_OP_CNTLZW,
+ // FPU
+ PPCREC_IML_OP_FPR_ASSIGN,
+ PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
+ PPCREC_IML_OP_FPR_ADD,
+ PPCREC_IML_OP_FPR_SUB,
+ PPCREC_IML_OP_FPR_MULTIPLY,
+ PPCREC_IML_OP_FPR_DIVIDE,
+ PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, // expand f32 to f64 in-place
+ PPCREC_IML_OP_FPR_NEGATE,
+ PPCREC_IML_OP_FPR_ABS, // abs(fpr)
+ PPCREC_IML_OP_FPR_NEGATIVE_ABS, // -abs(fpr)
+ PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
+ PPCREC_IML_OP_FPR_FCTIWZ,
+ PPCREC_IML_OP_FPR_SELECT, // selectively copy bottom value from operand B or C based on value in operand A
+ // Conversion (FPR_R_R)
+ PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
+ PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
+
+ // Bitcast (FPR_R_R)
+ PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT,
+
+ // R_R_R + R_R_S32
+ PPCREC_IML_OP_ADD, // also R_R_R_CARRY
+ PPCREC_IML_OP_SUB,
+
+ // R_R only
+ PPCREC_IML_OP_NOT,
+ PPCREC_IML_OP_NEG,
+ PPCREC_IML_OP_ASSIGN_S16_TO_S32,
+ PPCREC_IML_OP_ASSIGN_S8_TO_S32,
+
+ // R_R_R_carry
+ PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1)
+
+ // X86 extension
+ PPCREC_IML_OP_X86_CMP, // R_R and R_S32
+
+ PPCREC_IML_OP_INVALID
+};
+
+#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
+
+enum
+{
+ PPCREC_IML_MACRO_B_TO_REG, // branch to PPC address in register (used for BCCTR, BCLR)
+
+ PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
+ PPCREC_IML_MACRO_B_FAR, // branch to different function
+ PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
+ PPCREC_IML_MACRO_HLE, // HLE function call
+ PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
+ // debugging
+ PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
+};
+
+enum class IMLCondition : uint8
+{
+ EQ,
+ NEQ,
+ SIGNED_GT,
+ SIGNED_LT,
+ UNSIGNED_GT,
+ UNSIGNED_LT,
+
+ // floating point conditions
+ UNORDERED_GT, // a > b, false if either is NaN
+ UNORDERED_LT, // a < b, false if either is NaN
+ UNORDERED_EQ, // a == b, false if either is NaN
+ UNORDERED_U, // unordered (true if either operand is NaN)
+
+ ORDERED_GT,
+ ORDERED_LT,
+ ORDERED_EQ,
+ ORDERED_U
+};
+
+enum
+{
+ PPCREC_IML_TYPE_NONE,
+ PPCREC_IML_TYPE_NO_OP, // no-op instruction
+ PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r)
+ PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
+ PPCREC_IML_TYPE_R_R_R_CARRY, // r* = r* (op) r* (reads and/or updates carry)
+ PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
+ PPCREC_IML_TYPE_R_R_S32_CARRY, // r* = r* (op) s32* (reads and/or updates carry)
+ PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
+ PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
+ PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
+ PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
+ PPCREC_IML_TYPE_R_NAME, // r* = name
+ PPCREC_IML_TYPE_NAME_R, // name* = r*
+ PPCREC_IML_TYPE_R_S32, // r* (op) imm
+ PPCREC_IML_TYPE_MACRO,
+ PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0
+
+ // conditions and branches
+ PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r*
+ PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm
+ PPCREC_IML_TYPE_JUMP, // jump always
+ PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register
+
+ // atomic
+ PPCREC_IML_TYPE_ATOMIC_CMP_STORE,
+
+ // function call
+ PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address
+
+ // FPR
+ PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_R_R,
+ PPCREC_IML_TYPE_FPR_R_R_R,
+ PPCREC_IML_TYPE_FPR_R_R_R_R,
+ PPCREC_IML_TYPE_FPR_R,
+
+ PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r*
+
+ // X86 specific
+ PPCREC_IML_TYPE_X86_EFLAGS_JCC,
+};
+
+enum // IMLName
+{
+ PPCREC_NAME_NONE,
+ PPCREC_NAME_TEMPORARY = 1000,
+ PPCREC_NAME_R0 = 2000,
+ PPCREC_NAME_SPR0 = 3000,
+ PPCREC_NAME_FPR_HALF = 4800, // Counts PS0 and PS1 separately. E.g. fp3.ps1 is at offset 3 * 2 + 1
+ PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
+ PPCREC_NAME_XER_CA = 6000, // carry bit from XER
+ PPCREC_NAME_XER_OV = 6001, // overflow bit from XER
+ PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER
+ PPCREC_NAME_CR = 7000, // CR register bits (31 to 0)
+ PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31,
+ PPCREC_NAME_CPU_MEMRES_EA = 8000,
+ PPCREC_NAME_CPU_MEMRES_VAL = 8001
+};
+
+#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead
+
+enum
+{
+ // fpr load
+ PPCREC_FPR_LD_MODE_SINGLE,
+ PPCREC_FPR_LD_MODE_DOUBLE,
+
+ // fpr store
+ PPCREC_FPR_ST_MODE_SINGLE,
+ PPCREC_FPR_ST_MODE_DOUBLE,
+
+ PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
+};
+
+struct IMLUsedRegisters
+{
+ IMLUsedRegisters() {};
+
+ bool IsWrittenByRegId(IMLRegID regId) const
+ {
+ if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId)
+ return true;
+ if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId)
+ return true;
+ return false;
+ }
+
+ bool IsBaseGPRWritten(IMLReg imlReg) const
+ {
+ cemu_assert_debug(imlReg.IsValid());
+ auto regId = imlReg.GetRegID();
+ return IsWrittenByRegId(regId);
+ }
+
+ template
+ void ForEachWrittenGPR(Fn F) const
+ {
+ if (writtenGPR1.IsValid())
+ F(writtenGPR1);
+ if (writtenGPR2.IsValid())
+ F(writtenGPR2);
+ }
+
+ template
+ void ForEachReadGPR(Fn F) const
+ {
+ if (readGPR1.IsValid())
+ F(readGPR1);
+ if (readGPR2.IsValid())
+ F(readGPR2);
+ if (readGPR3.IsValid())
+ F(readGPR3);
+ if (readGPR4.IsValid())
+ F(readGPR4);
+ }
+
+ template
+ void ForEachAccessedGPR(Fn F) const
+ {
+ // GPRs
+ if (readGPR1.IsValid())
+ F(readGPR1, false);
+ if (readGPR2.IsValid())
+ F(readGPR2, false);
+ if (readGPR3.IsValid())
+ F(readGPR3, false);
+ if (readGPR4.IsValid())
+ F(readGPR4, false);
+ if (writtenGPR1.IsValid())
+ F(writtenGPR1, true);
+ if (writtenGPR2.IsValid())
+ F(writtenGPR2, true);
+ }
+
+ IMLReg readGPR1;
+ IMLReg readGPR2;
+ IMLReg readGPR3;
+ IMLReg readGPR4;
+ IMLReg writtenGPR1;
+ IMLReg writtenGPR2;
+};
+
+struct IMLInstruction
+{
+ IMLInstruction() {}
+ IMLInstruction(const IMLInstruction& other)
+ {
+ memcpy(this, &other, sizeof(IMLInstruction));
+ }
+
+ uint8 type;
+ uint8 operation;
+ union
+ {
+ struct
+ {
+ uint8 _padding[7];
+ }padding;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ }op_r_r;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regB;
+ }op_r_r_r;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regB;
+ IMLReg regCarry;
+ }op_r_r_r_carry;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ sint32 immS32;
+ }op_r_r_s32;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regCarry;
+ sint32 immS32;
+ }op_r_r_s32_carry;
+ struct
+ {
+ IMLReg regR;
+ IMLName name;
+ }op_r_name; // alias op_name_r
+ struct
+ {
+ IMLReg regR;
+ sint32 immS32;
+ }op_r_immS32;
+ struct
+ {
+ uint32 param;
+ uint32 param2;
+ uint16 paramU16;
+ IMLReg paramReg;
+ }op_macro;
+ struct
+ {
+ IMLReg registerData;
+ IMLReg registerMem;
+ IMLReg registerMem2;
+ uint8 copyWidth;
+ struct
+ {
+ bool swapEndian : 1;
+ bool signExtend : 1;
+ bool notExpanded : 1; // for floats
+ }flags2;
+ uint8 mode; // transfer mode
+ sint32 immS32;
+ }op_storeLoad;
+ struct
+ {
+ uintptr_t callAddress;
+ IMLReg regParam0;
+ IMLReg regParam1;
+ IMLReg regParam2;
+ IMLReg regReturn;
+ }op_call_imm;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ }op_fpr_r_r;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regB;
+ }op_fpr_r_r_r;
+ struct
+ {
+ IMLReg regR;
+ IMLReg regA;
+ IMLReg regB;
+ IMLReg regC;
+ }op_fpr_r_r_r_r;
+ struct
+ {
+ IMLReg regR;
+ }op_fpr_r;
+ struct
+ {
+ IMLReg regR; // stores the boolean result of the comparison
+ IMLReg regA;
+ IMLReg regB;
+ IMLCondition cond;
+ }op_fpr_compare;
+ struct
+ {
+ IMLReg regR; // stores the boolean result of the comparison
+ IMLReg regA;
+ IMLReg regB;
+ IMLCondition cond;
+ }op_compare;
+ struct
+ {
+ IMLReg regR; // stores the boolean result of the comparison
+ IMLReg regA;
+ sint32 immS32;
+ IMLCondition cond;
+ }op_compare_s32;
+ struct
+ {
+ IMLReg registerBool;
+ bool mustBeTrue;
+ }op_conditional_jump;
+ struct
+ {
+ IMLReg regEA;
+ IMLReg regCompareValue;
+ IMLReg regWriteValue;
+ IMLReg regBoolOut;
+ }op_atomic_compare_store;
+ // conditional operations (emitted if supported by target platform)
+ struct
+ {
+ // r_s32
+ IMLReg regR;
+ sint32 immS32;
+ // condition
+ uint8 crRegisterIndex;
+ uint8 crBitIndex;
+ bool bitMustBeSet;
+ }op_conditional_r_s32;
+ // X86 specific
+ struct
+ {
+ IMLCondition cond;
+ bool invertedCondition;
+ }op_x86_eflags_jcc;
+ };
+
+ bool IsSuffixInstruction() const
+ {
+ if (type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BL ||
+ type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_FAR ||
+ type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_TO_REG ||
+ type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE ||
+ type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE ||
+ type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ||
+ type == PPCREC_IML_TYPE_JUMP ||
+ type == PPCREC_IML_TYPE_CONDITIONAL_JUMP ||
+ type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
+ return true;
+ return false;
+ }
+
+ // instruction setters
+ void make_no_op()
+ {
+ type = PPCREC_IML_TYPE_NO_OP;
+ operation = 0;
+ }
+
+ void make_r_name(IMLReg regR, IMLName name)
+ {
+ cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format
+ type = PPCREC_IML_TYPE_R_NAME;
+ operation = PPCREC_IML_OP_ASSIGN;
+ op_r_name.regR = regR;
+ op_r_name.name = name;
+ }
+
+ void make_name_r(IMLName name, IMLReg regR)
+ {
+ cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format
+ type = PPCREC_IML_TYPE_NAME_R;
+ operation = PPCREC_IML_OP_ASSIGN;
+ op_r_name.regR = regR;
+ op_r_name.name = name;
+ }
+
+ void make_debugbreak(uint32 currentPPCAddress = 0)
+ {
+ make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0, IMLREG_INVALID);
+ }
+
+ void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16, IMLReg regParam)
+ {
+ this->type = PPCREC_IML_TYPE_MACRO;
+ this->operation = macroId;
+ this->op_macro.param = param;
+ this->op_macro.param2 = param2;
+ this->op_macro.paramU16 = paramU16;
+ this->op_macro.paramReg = regParam;
+ }
+
+ void make_cjump_cycle_check()
+ {
+ this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK;
+ this->operation = 0;
+ }
+
+ void make_r_r(uint32 operation, IMLReg regR, IMLReg regA)
+ {
+ this->type = PPCREC_IML_TYPE_R_R;
+ this->operation = operation;
+ this->op_r_r.regR = regR;
+ this->op_r_r.regA = regA;
+ }
+
+ void make_r_s32(uint32 operation, IMLReg regR, sint32 immS32)
+ {
+ this->type = PPCREC_IML_TYPE_R_S32;
+ this->operation = operation;
+ this->op_r_immS32.regR = regR;
+ this->op_r_immS32.immS32 = immS32;
+ }
+
+ void make_r_r_r(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB)
+ {
+ this->type = PPCREC_IML_TYPE_R_R_R;
+ this->operation = operation;
+ this->op_r_r_r.regR = regR;
+ this->op_r_r_r.regA = regA;
+ this->op_r_r_r.regB = regB;
+ }
+
+ void make_r_r_r_carry(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB, IMLReg regCarry)
+ {
+ this->type = PPCREC_IML_TYPE_R_R_R_CARRY;
+ this->operation = operation;
+ this->op_r_r_r_carry.regR = regR;
+ this->op_r_r_r_carry.regA = regA;
+ this->op_r_r_r_carry.regB = regB;
+ this->op_r_r_r_carry.regCarry = regCarry;
+ }
+
+ void make_r_r_s32(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32)
+ {
+ this->type = PPCREC_IML_TYPE_R_R_S32;
+ this->operation = operation;
+ this->op_r_r_s32.regR = regR;
+ this->op_r_r_s32.regA = regA;
+ this->op_r_r_s32.immS32 = immS32;
+ }
+
+ void make_r_r_s32_carry(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32, IMLReg regCarry)
+ {
+ this->type = PPCREC_IML_TYPE_R_R_S32_CARRY;
+ this->operation = operation;
+ this->op_r_r_s32_carry.regR = regR;
+ this->op_r_r_s32_carry.regA = regA;
+ this->op_r_r_s32_carry.immS32 = immS32;
+ this->op_r_r_s32_carry.regCarry = regCarry;
+ }
+
+ void make_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
+ {
+ this->type = PPCREC_IML_TYPE_COMPARE;
+ this->operation = PPCREC_IML_OP_INVALID;
+ this->op_compare.regR = regR;
+ this->op_compare.regA = regA;
+ this->op_compare.regB = regB;
+ this->op_compare.cond = cond;
+ }
+
+ void make_compare_s32(IMLReg regA, sint32 immS32, IMLReg regR, IMLCondition cond)
+ {
+ this->type = PPCREC_IML_TYPE_COMPARE_S32;
+ this->operation = PPCREC_IML_OP_INVALID;
+ this->op_compare_s32.regR = regR;
+ this->op_compare_s32.regA = regA;
+ this->op_compare_s32.immS32 = immS32;
+ this->op_compare_s32.cond = cond;
+ }
+
+ void make_conditional_jump(IMLReg regBool, bool mustBeTrue)
+ {
+ this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP;
+ this->operation = PPCREC_IML_OP_INVALID;
+ this->op_conditional_jump.registerBool = regBool;
+ this->op_conditional_jump.mustBeTrue = mustBeTrue;
+ }
+
+ void make_jump()
+ {
+ this->type = PPCREC_IML_TYPE_JUMP;
+ this->operation = PPCREC_IML_OP_INVALID;
+ }
+
+ // load from memory
+ void make_r_memory(IMLReg regD, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_LOAD;
+ this->operation = 0;
+ this->op_storeLoad.registerData = regD;
+ this->op_storeLoad.registerMem = regMem;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.copyWidth = copyWidth;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ this->op_storeLoad.flags2.signExtend = signExtend;
+ }
+
+ // store to memory
+ void make_memory_r(IMLReg regS, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_STORE;
+ this->operation = 0;
+ this->op_storeLoad.registerData = regS;
+ this->op_storeLoad.registerMem = regMem;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.copyWidth = copyWidth;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ this->op_storeLoad.flags2.signExtend = false;
+ }
+
+ void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput)
+ {
+ this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE;
+ this->operation = 0;
+ this->op_atomic_compare_store.regEA = regEA;
+ this->op_atomic_compare_store.regCompareValue = regCompareValue;
+ this->op_atomic_compare_store.regWriteValue = regWriteValue;
+ this->op_atomic_compare_store.regBoolOut = regSuccessOutput;
+ }
+
+ void make_call_imm(uintptr_t callAddress, IMLReg param0, IMLReg param1, IMLReg param2, IMLReg regReturn)
+ {
+ this->type = PPCREC_IML_TYPE_CALL_IMM;
+ this->operation = 0;
+ this->op_call_imm.callAddress = callAddress;
+ this->op_call_imm.regParam0 = param0;
+ this->op_call_imm.regParam1 = param1;
+ this->op_call_imm.regParam2 = param2;
+ this->op_call_imm.regReturn = regReturn;
+ }
+
+ // FPR
+
+ // load from memory
+ void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_LOAD;
+ this->operation = 0;
+ this->op_storeLoad.registerData = registerDestination;
+ this->op_storeLoad.registerMem = registerMemory;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.mode = mode;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ }
+
+ void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED;
+ this->operation = 0;
+ this->op_storeLoad.registerData = registerDestination;
+ this->op_storeLoad.registerMem = registerMemory1;
+ this->op_storeLoad.registerMem2 = registerMemory2;
+ this->op_storeLoad.immS32 = 0;
+ this->op_storeLoad.mode = mode;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ }
+
+ // store to memory
+ void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_STORE;
+ this->operation = 0;
+ this->op_storeLoad.registerData = registerSource;
+ this->op_storeLoad.registerMem = registerMemory;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.mode = mode;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ }
+
+ void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED;
+ this->operation = 0;
+ this->op_storeLoad.registerData = registerSource;
+ this->op_storeLoad.registerMem = registerMemory1;
+ this->op_storeLoad.registerMem2 = registerMemory2;
+ this->op_storeLoad.immS32 = immS32;
+ this->op_storeLoad.mode = mode;
+ this->op_storeLoad.flags2.swapEndian = switchEndian;
+ }
+
+ void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
+ {
+ this->type = PPCREC_IML_TYPE_FPR_COMPARE;
+ this->operation = -999;
+ this->op_fpr_compare.regR = regR;
+ this->op_fpr_compare.regA = regA;
+ this->op_fpr_compare.regB = regB;
+ this->op_fpr_compare.cond = cond;
+ }
+
+ void make_fpr_r(sint32 operation, IMLReg registerResult)
+ {
+ // OP (fpr)
+ this->type = PPCREC_IML_TYPE_FPR_R;
+ this->operation = operation;
+ this->op_fpr_r.regR = registerResult;
+ }
+
+ void make_fpr_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER)
+ {
+ // fpr OP fpr
+ this->type = PPCREC_IML_TYPE_FPR_R_R;
+ this->operation = operation;
+ this->op_fpr_r_r.regR = registerResult;
+ this->op_fpr_r_r.regA = registerOperand;
+ }
+
+ void make_fpr_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER)
+ {
+ // fpr = OP (fpr,fpr)
+ this->type = PPCREC_IML_TYPE_FPR_R_R_R;
+ this->operation = operation;
+ this->op_fpr_r_r_r.regR = registerResult;
+ this->op_fpr_r_r_r.regA = registerOperand1;
+ this->op_fpr_r_r_r.regB = registerOperand2;
+ }
+
+ void make_fpr_r_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER)
+ {
+ // fpr = OP (fpr,fpr,fpr)
+ this->type = PPCREC_IML_TYPE_FPR_R_R_R_R;
+ this->operation = operation;
+ this->op_fpr_r_r_r_r.regR = registerResult;
+ this->op_fpr_r_r_r_r.regA = registerOperandA;
+ this->op_fpr_r_r_r_r.regB = registerOperandB;
+ this->op_fpr_r_r_r_r.regC = registerOperandC;
+ }
+
+ /* X86 specific */
+ void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition)
+ {
+ this->type = PPCREC_IML_TYPE_X86_EFLAGS_JCC;
+ this->operation = -999;
+ this->op_x86_eflags_jcc.cond = cond;
+ this->op_x86_eflags_jcc.invertedCondition = invertedCondition;
+ }
+
+ void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
+ bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used
+
+ void RewriteGPR(const std::unordered_map& translationTable);
+};
+
+// architecture specific constants
+namespace IMLArchX86
+{
+ static constexpr int PHYSREG_GPR_BASE = 0;
+ static constexpr int PHYSREG_FPR_BASE = 16;
+};
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
new file mode 100644
index 00000000..7671a163
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
@@ -0,0 +1,719 @@
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
+#include "Cafe/HW/Espresso/Recompiler/IML/IML.h"
+#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h"
+
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+#include "../BackendX64/BackendX64.h"
+
+#include "Common/FileStream.h"
+
+#include
+#include
+
+IMLReg _FPRRegFromID(IMLRegID regId)
+{
+ return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId);
+}
+
+void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg)
+{
+ IMLRegID fprIndex = fprReg.GetRegID();
+
+ IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
+ if (imlInstructionLoad->op_storeLoad.flags2.notExpanded)
+ return;
+ boost::container::static_vector trackedMoves; // only track up to 4 copies
+ IMLUsedRegisters registersUsed;
+ sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
+ bool foundMatch = false;
+ sint32 lastStore = -1;
+ for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++)
+ {
+ IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
+ if (imlInstruction->IsSuffixInstruction())
+ break;
+ // check if FPR is stored
+ if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE) ||
+ (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE))
+ {
+ if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex)
+ {
+ if (foundMatch == false)
+ {
+ // flag the load-single instruction as "don't expand" (leave single value as-is)
+ imlInstructionLoad->op_storeLoad.flags2.notExpanded = true;
+ }
+ // also set the flag for the store instruction
+ IMLInstruction* imlInstructionStore = imlInstruction;
+ imlInstructionStore->op_storeLoad.flags2.notExpanded = true;
+
+ foundMatch = true;
+ lastStore = i + 1;
+
+ continue;
+ }
+ }
+ // if the FPR is copied then keep track of it. We can expand the copies instead of the original
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN && imlInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex)
+ {
+ if (imlInstruction->op_fpr_r_r.regR.GetRegID() == fprIndex)
+ {
+ // unexpected no-op
+ break;
+ }
+ if (trackedMoves.size() >= trackedMoves.capacity())
+ {
+ // we cant track any more moves, expand here
+ lastStore = i;
+ break;
+ }
+ trackedMoves.push_back(i);
+ continue;
+ }
+ // check if FPR is overwritten
+ imlInstruction->CheckRegisterUsage(®istersUsed);
+ if (registersUsed.writtenGPR1.IsValidAndSameRegID(fprIndex) || registersUsed.writtenGPR2.IsValidAndSameRegID(fprIndex))
+ break;
+ if (registersUsed.readGPR1.IsValidAndSameRegID(fprIndex))
+ break;
+ if (registersUsed.readGPR2.IsValidAndSameRegID(fprIndex))
+ break;
+ if (registersUsed.readGPR3.IsValidAndSameRegID(fprIndex))
+ break;
+ if (registersUsed.readGPR4.IsValidAndSameRegID(fprIndex))
+ break;
+ }
+
+ if (foundMatch)
+ {
+ // insert expand instructions for each target register of a move
+ sint32 positionBias = 0;
+ for (auto& trackedMove : trackedMoves)
+ {
+ sint32 realPosition = trackedMove + positionBias;
+ IMLInstruction* imlMoveInstruction = imlSegment->imlList.data() + realPosition;
+ if (realPosition >= lastStore)
+ break; // expand is inserted before this move
+ else
+ lastStore++;
+
+ cemu_assert_debug(imlMoveInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlMoveInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex);
+ cemu_assert_debug(imlMoveInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::F64);
+ auto dstReg = imlMoveInstruction->op_fpr_r_r.regR;
+ IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, realPosition+1); // one after the move
+ newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, dstReg);
+ positionBias++;
+ }
+ // insert expand instruction after store
+ IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore);
+ newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex));
+ }
+}
+
+/*
+* Scans for patterns:
+*
+*
+*
+* For these patterns the store and load is modified to work with un-extended values (float remains as float, no double conversion)
+* The float->double extension is then executed later
+* Advantages:
+* Keeps denormals and other special float values intact
+* Slightly improves performance
+*/
+void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext)
+{
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ for (sint32 i = 0; i < segIt->imlList.size(); i++)
+ {
+ IMLInstruction* imlInstruction = segIt->imlList.data() + i;
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
+ {
+ PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
+ {
+ PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
+ }
+ }
+ }
+}
+
+void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
+{
+ cemu_assert_debug(gprReg.GetBaseFormat() == IMLRegFormat::I64); // todo - proper handling required for non-standard sizes
+ cemu_assert_debug(gprReg.GetRegFormat() == IMLRegFormat::I32);
+
+ IMLRegID gprIndex = gprReg.GetRegID();
+ IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
+ if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false )
+ return;
+ bool foundMatch = false;
+ IMLUsedRegisters registersUsed;
+ sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
+ sint32 i = imlIndexLoad + 1;
+ for (; i < scanRangeEnd; i++)
+ {
+ IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
+ if (imlInstruction->IsSuffixInstruction())
+ break;
+ // check if GPR is stored
+ if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) )
+ {
+ if (imlInstruction->op_storeLoad.registerMem.GetRegID() == gprIndex)
+ break;
+ if (imlInstruction->op_storeLoad.registerData.GetRegID() == gprIndex)
+ {
+ IMLInstruction* imlInstructionStore = imlInstruction;
+ if (foundMatch == false)
+ {
+ // switch the endian swap flag for the load instruction
+ imlInstructionLoad->op_storeLoad.flags2.swapEndian = !imlInstructionLoad->op_storeLoad.flags2.swapEndian;
+ foundMatch = true;
+ }
+ // switch the endian swap flag for the store instruction
+ imlInstructionStore->op_storeLoad.flags2.swapEndian = !imlInstructionStore->op_storeLoad.flags2.swapEndian;
+ // keep scanning
+ continue;
+ }
+ }
+ // check if GPR is accessed
+ imlInstruction->CheckRegisterUsage(®istersUsed);
+ if (registersUsed.readGPR1.IsValidAndSameRegID(gprIndex) ||
+ registersUsed.readGPR2.IsValidAndSameRegID(gprIndex) ||
+ registersUsed.readGPR3.IsValidAndSameRegID(gprIndex))
+ {
+ break;
+ }
+ if (registersUsed.IsBaseGPRWritten(gprReg))
+ return; // GPR overwritten, we don't need to byte swap anymore
+ }
+ if (foundMatch)
+ {
+ PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprReg, gprReg);
+ }
+}
+
+/*
+* Scans for patterns:
+*
+*
+*
+* For these patterns the store and load is modified to work with non-swapped values
+* The big_endian->little_endian conversion is then executed later
+* Advantages:
+* Slightly improves performance
+*/
+void IMLOptimizer_OptimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext)
+{
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
+ {
+ for (sint32 i = 0; i < segIt->imlList.size(); i++)
+ {
+ IMLInstruction* imlInstruction = segIt->imlList.data() + i;
+ if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian )
+ {
+ PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
+ }
+ }
+ }
+}
+
+IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg);
+
+sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg)
+{
+ if (gqrReg.IsInvalid())
+ return -1;
+ sint32 namedReg = PPCRecompilerImlGen_GetRegName(ppcImlGenContext, gqrReg);
+ if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7))
+ {
+ return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0);
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ return -1;
+}
+
+bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue)
+{
+ // the default configuration is:
+ // UGQR0 = 0x00000000
+ // UGQR2 = 0x00040004
+ // UGQR3 = 0x00050005
+ // UGQR4 = 0x00060006
+ // UGQR5 = 0x00070007
+ // but games are free to modify UGQR2 to UGQR7 it seems.
+ // no game modifies UGQR0 so it's safe enough to optimize for the default value
+ // Ideally we would do some kind of runtime tracking and second recompilation to create fast paths for PSQ_L/PSQ_ST but thats todo
+ if (gqrIndex == 0)
+ gqrValue = 0x00000000;
+ else
+ return false;
+ return true;
+}
+
+// analyses register dependencies across the entire function
+// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten)
+class IMLOptimizerRegIOAnalysis
+{
+ public:
+ // constructor with segment pointer list as span
+ IMLOptimizerRegIOAnalysis(std::span segmentList, uint32 maxRegId) : m_segmentList(segmentList), m_maxRegId(maxRegId)
+ {
+ m_segRegisterInOutList.resize(segmentList.size());
+ }
+
+ struct IMLSegmentRegisterInOut
+ {
+ // todo - since our register ID range is usually pretty small (<64) we could use integer bitmasks to accelerate this? There is a helper class used in RA code already
+ std::unordered_set regWritten; // registers which are modified in this segment
+ std::unordered_set regImported; // registers which are read in this segment before they are written (importing value from previous segments)
+ std::unordered_set regForward; // registers which are not read or written in this segment, but are imported into a later segment (propagated info)
+ };
+
+ // calculate which registers are imported (read-before-written) and forwarded (read-before-written by a later segment) per segment
+ // then in a second step propagate the dependencies across linked segments
+ void ComputeDepedencies()
+ {
+ std::vector& segRegisterInOutList = m_segRegisterInOutList;
+ IMLSegmentRegisterInOut* segIO = segRegisterInOutList.data();
+ uint32 index = 0;
+ for(auto& seg : m_segmentList)
+ {
+ seg->momentaryIndex = index;
+ index++;
+ for(auto& instr : seg->imlList)
+ {
+ IMLUsedRegisters registerUsage;
+ instr.CheckRegisterUsage(®isterUsage);
+ // registers are considered imported if they are read before being written in this seg
+ registerUsage.ForEachReadGPR([&](IMLReg gprReg) {
+ IMLRegID gprId = gprReg.GetRegID();
+ if (!segIO->regWritten.contains(gprId))
+ {
+ segIO->regImported.insert(gprId);
+ }
+ });
+ registerUsage.ForEachWrittenGPR([&](IMLReg gprReg) {
+ IMLRegID gprId = gprReg.GetRegID();
+ segIO->regWritten.insert(gprId);
+ });
+ }
+ segIO++;
+ }
+ // for every exit segment, import all registers
+ for(auto& seg : m_segmentList)
+ {
+ if (!seg->nextSegmentIsUncertain)
+ continue;
+ if(seg->deadCodeEliminationHintSeg)
+ continue;
+ IMLSegmentRegisterInOut& segIO = segRegisterInOutList[seg->momentaryIndex];
+ for(uint32 i=0; i<=m_maxRegId; i++)
+ {
+ segIO.regImported.insert((IMLRegID)i);
+ }
+ }
+ // broadcast dependencies across segment chains
+ std::unordered_set segIdsWhichNeedUpdate;
+ for (uint32 i = 0; i < m_segmentList.size(); i++)
+ {
+ segIdsWhichNeedUpdate.insert(i);
+ }
+ while(!segIdsWhichNeedUpdate.empty())
+ {
+ auto firstIt = segIdsWhichNeedUpdate.begin();
+ uint32 segId = *firstIt;
+ segIdsWhichNeedUpdate.erase(firstIt);
+ // forward regImported and regForward to earlier segments into their regForward, unless the register is written
+ auto& curSeg = m_segmentList[segId];
+ IMLSegmentRegisterInOut& curSegIO = segRegisterInOutList[segId];
+ for(auto& prevSeg : curSeg->list_prevSegments)
+ {
+ IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex];
+ bool prevSegChanged = false;
+ for(auto& regId : curSegIO.regImported)
+ {
+ if (!prevSegIO.regWritten.contains(regId))
+ prevSegChanged |= prevSegIO.regForward.insert(regId).second;
+ }
+ for(auto& regId : curSegIO.regForward)
+ {
+ if (!prevSegIO.regWritten.contains(regId))
+ prevSegChanged |= prevSegIO.regForward.insert(regId).second;
+ }
+ if(prevSegChanged)
+ segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex);
+ }
+ // same for hint links
+ for(auto& prevSeg : curSeg->list_deadCodeHintBy)
+ {
+ IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex];
+ bool prevSegChanged = false;
+ for(auto& regId : curSegIO.regImported)
+ {
+ if (!prevSegIO.regWritten.contains(regId))
+ prevSegChanged |= prevSegIO.regForward.insert(regId).second;
+ }
+ for(auto& regId : curSegIO.regForward)
+ {
+ if (!prevSegIO.regWritten.contains(regId))
+ prevSegChanged |= prevSegIO.regForward.insert(regId).second;
+ }
+ if(prevSegChanged)
+ segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex);
+ }
+ }
+ }
+
+ std::unordered_set GetRegistersNeededAtEndOfSegment(IMLSegment& seg)
+ {
+ std::unordered_set regsNeeded;
+ if(seg.nextSegmentIsUncertain)
+ {
+ if(seg.deadCodeEliminationHintSeg)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex];
+ regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
+ regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
+ }
+ else
+ {
+ // add all regs
+ for(uint32 i = 0; i <= m_maxRegId; i++)
+ regsNeeded.insert(i);
+ }
+ return regsNeeded;
+ }
+ if(seg.nextSegmentBranchTaken)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex];
+ regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
+ regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
+ }
+ if(seg.nextSegmentBranchNotTaken)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex];
+ regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
+ regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
+ }
+ return regsNeeded;
+ }
+
+ bool IsRegisterNeededAtEndOfSegment(IMLSegment& seg, IMLRegID regId)
+ {
+ if(seg.nextSegmentIsUncertain)
+ {
+ if(!seg.deadCodeEliminationHintSeg)
+ return true;
+ auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex];
+ if(nextSegIO.regImported.contains(regId))
+ return true;
+ if(nextSegIO.regForward.contains(regId))
+ return true;
+ return false;
+ }
+ if(seg.nextSegmentBranchTaken)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex];
+ if(nextSegIO.regImported.contains(regId))
+ return true;
+ if(nextSegIO.regForward.contains(regId))
+ return true;
+ }
+ if(seg.nextSegmentBranchNotTaken)
+ {
+ auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex];
+ if(nextSegIO.regImported.contains(regId))
+ return true;
+ if(nextSegIO.regForward.contains(regId))
+ return true;
+ }
+ return false;
+ }
+
+ private:
+ std::span m_segmentList;
+ uint32 m_maxRegId;
+
+ std::vector m_segRegisterInOutList;
+
+};
+
+// scan backwards starting from index and return the index of the first found instruction which writes to the given register (by id)
+sint32 IMLUtil_FindInstructionWhichWritesRegister(IMLSegment& seg, sint32 startIndex, IMLReg reg, sint32 maxScanDistance = -1)
+{
+ sint32 endIndex = std::max(startIndex - maxScanDistance, 0);
+ for (sint32 i = startIndex; i >= endIndex; i--)
+ {
+ IMLInstruction& imlInstruction = seg.imlList[i];
+ IMLUsedRegisters registersUsed;
+ imlInstruction.CheckRegisterUsage(®istersUsed);
+ if (registersUsed.IsBaseGPRWritten(reg))
+ return i;
+ }
+ return -1;
+}
+
+// returns true if the instruction can safely be moved while keeping ordering constraints and data dependencies intact
+// initialIndex is inclusive, targetIndex is exclusive
+bool IMLUtil_CanMoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex)
+{
+ boost::container::static_vector regsWritten;
+ boost::container::static_vector regsRead;
+ // get list of read and written registers
+ IMLUsedRegisters registersUsed;
+ seg.imlList[initialIndex].CheckRegisterUsage(®istersUsed);
+ registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) {
+ if (isWritten)
+ regsWritten.push_back(reg.GetRegID());
+ else
+ regsRead.push_back(reg.GetRegID());
+ });
+ // check all the instructions inbetween
+ if(initialIndex < targetIndex)
+ {
+ sint32 scanStartIndex = initialIndex+1; // +1 to skip the moving instruction itself
+ sint32 scanEndIndex = targetIndex;
+ for (sint32 i = scanStartIndex; i < scanEndIndex; i++)
+ {
+ IMLUsedRegisters registersUsed;
+ seg.imlList[i].CheckRegisterUsage(®istersUsed);
+ // in order to be able to move an instruction past another instruction, any of the read registers must not be modified (written)
+ // and any of it's written registers must not be read
+ bool canMove = true;
+ registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) {
+ IMLRegID regId = reg.GetRegID();
+ if (!isWritten)
+ canMove = canMove && std::find(regsWritten.begin(), regsWritten.end(), regId) == regsWritten.end();
+ else
+ canMove = canMove && std::find(regsRead.begin(), regsRead.end(), regId) == regsRead.end();
+ });
+ if(!canMove)
+ return false;
+ }
+ }
+ else
+ {
+ cemu_assert_unimplemented(); // backwards scan is todo
+ return false;
+ }
+ return true;
+}
+
+sint32 IMLUtil_CountRegisterReadsInRange(IMLSegment& seg, sint32 scanStartIndex, sint32 scanEndIndex, IMLRegID regId)
+{
+ cemu_assert_debug(scanStartIndex <= scanEndIndex);
+ cemu_assert_debug(scanEndIndex < seg.imlList.size());
+ sint32 count = 0;
+ for (sint32 i = scanStartIndex; i <= scanEndIndex; i++)
+ {
+ IMLUsedRegisters registersUsed;
+ seg.imlList[i].CheckRegisterUsage(®istersUsed);
+ registersUsed.ForEachReadGPR([&](IMLReg reg) {
+ if (reg.GetRegID() == regId)
+ count++;
+ });
+ }
+ return count;
+}
+
+// move instruction from one index to another
+// instruction will be inserted before the instruction at targetIndex
+// returns the new instruction index of the moved instruction
+sint32 IMLUtil_MoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex)
+{
+ cemu_assert_debug(initialIndex != targetIndex);
+ IMLInstruction temp = seg.imlList[initialIndex];
+ if (initialIndex < targetIndex)
+ {
+ cemu_assert_debug(targetIndex > 0);
+ targetIndex--;
+ for(size_t i=initialIndex; i regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg);
+
+ // start with suffix instruction
+ if(seg.HasSuffixInstruction())
+ {
+ IMLInstruction& imlInstruction = seg.imlList[seg.GetSuffixInstructionIndex()];
+ IMLUsedRegisters registersUsed;
+ imlInstruction.CheckRegisterUsage(®istersUsed);
+ registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
+ regsNeeded.erase(reg.GetRegID());
+ });
+ registersUsed.ForEachReadGPR([&](IMLReg reg) {
+ regsNeeded.insert(reg.GetRegID());
+ });
+ }
+ // iterate instructions backwards
+ for (sint32 i = seg.imlList.size() - (seg.HasSuffixInstruction() ? 2:1); i >= 0; i--)
+ {
+ IMLInstruction& imlInstruction = seg.imlList[i];
+ IMLUsedRegisters registersUsed;
+ imlInstruction.CheckRegisterUsage(®istersUsed);
+ // register read -> remove from overwritten list
+ // register written -> add to overwritten list
+
+ // check if this instruction only writes registers which will never be read
+ bool onlyWritesRedundantRegisters = true;
+ registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
+ if (regsNeeded.contains(reg.GetRegID()))
+ onlyWritesRedundantRegisters = false;
+ });
+ // check if any of the written registers are read after this point
+ registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
+ regsNeeded.erase(reg.GetRegID());
+ });
+ registersUsed.ForEachReadGPR([&](IMLReg reg) {
+ regsNeeded.insert(reg.GetRegID());
+ });
+ if(!imlInstruction.HasSideEffects() && onlyWritesRedundantRegisters)
+ {
+ imlInstruction.make_no_op();
+ }
+ }
+}
+
+void IMLOptimizerX86_SubstituteCJumpForEflagsJump(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
+{
+ // convert and optimize bool condition jumps to eflags condition jumps
+ // - Moves eflag setter (e.g. cmp) closer to eflags consumer (conditional jump) if necessary. If not possible but required then exit early
+ // - Since we only rely on eflags, the boolean register can be optimized out if DCE considers it unused
+ // - Further detect and optimize patterns like DEC + CMP + JCC into fused ops (todo)
+
+ // check if this segment ends with a conditional jump
+ if(!seg.HasSuffixInstruction())
+ return;
+ sint32 cjmpInstIndex = seg.GetSuffixInstructionIndex();
+ if(cjmpInstIndex < 0)
+ return;
+ IMLInstruction& cjumpInstr = seg.imlList[cjmpInstIndex];
+ if( cjumpInstr.type != PPCREC_IML_TYPE_CONDITIONAL_JUMP )
+ return;
+ IMLReg regCondBool = cjumpInstr.op_conditional_jump.registerBool;
+ bool invertedCondition = !cjumpInstr.op_conditional_jump.mustBeTrue;
+ // find the instruction which sets the bool
+ sint32 cmpInstrIndex = IMLUtil_FindInstructionWhichWritesRegister(seg, cjmpInstIndex-1, regCondBool, 20);
+ if(cmpInstrIndex < 0)
+ return;
+ // check if its an instruction combo which can be optimized (currently only cmp + cjump) and get the condition
+ IMLInstruction& condSetterInstr = seg.imlList[cmpInstrIndex];
+ IMLCondition cond;
+ if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE)
+ cond = condSetterInstr.op_compare.cond;
+ else if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE_S32)
+ cond = condSetterInstr.op_compare_s32.cond;
+ else
+ return;
+ // check if instructions inbetween modify eflags
+ sint32 indexEflagsSafeStart = -1; // index of the first instruction which does not modify eflags up to cjump
+ for(sint32 i = cjmpInstIndex-1; i > cmpInstrIndex; i--)
+ {
+ if(IMLOptimizerX86_ModifiesEFlags(seg.imlList[i]))
+ {
+ indexEflagsSafeStart = i+1;
+ break;
+ }
+ }
+ if(indexEflagsSafeStart >= 0)
+ {
+ cemu_assert(indexEflagsSafeStart > 0);
+ // there are eflags-modifying instructions inbetween the bool setter and cjump
+ // try to move the eflags setter close enough to the cjump (to indexEflagsSafeStart)
+ bool canMove = IMLUtil_CanMoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart);
+ if(!canMove)
+ {
+ return;
+ }
+ else
+ {
+ cmpInstrIndex = IMLUtil_MoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart);
+ }
+ }
+ // we can turn the jump into an eflags jump
+ cjumpInstr.make_x86_eflags_jcc(cond, invertedCondition);
+
+ if (IMLUtil_CountRegisterReadsInRange(seg, cmpInstrIndex, cjmpInstIndex, regCondBool.GetRegID()) > 1 || regIoAnalysis.IsRegisterNeededAtEndOfSegment(seg, regCondBool.GetRegID()))
+ return; // bool register is used beyond the CMP, we can't drop it
+
+ auto& cmpInstr = seg.imlList[cmpInstrIndex];
+ cemu_assert_debug(cmpInstr.type == PPCREC_IML_TYPE_COMPARE || cmpInstr.type == PPCREC_IML_TYPE_COMPARE_S32);
+ if(cmpInstr.type == PPCREC_IML_TYPE_COMPARE)
+ {
+ IMLReg regA = cmpInstr.op_compare.regA;
+ IMLReg regB = cmpInstr.op_compare.regB;
+ seg.imlList[cmpInstrIndex].make_r_r(PPCREC_IML_OP_X86_CMP, regA, regB);
+ }
+ else
+ {
+ IMLReg regA = cmpInstr.op_compare_s32.regA;
+ sint32 val = cmpInstr.op_compare_s32.immS32;
+ seg.imlList[cmpInstrIndex].make_r_s32(PPCREC_IML_OP_X86_CMP, regA, val);
+ }
+
+}
+
+void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
+{
+ IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg);
+
+#ifdef ARCH_X86_64
+ // x86 specific optimizations
+ IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis)
+#endif
+}
+
+void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext)
+{
+ IMLOptimizerRegIOAnalysis regIoAnalysis(ppcImlGenContext.segmentList2, ppcImlGenContext.GetMaxRegId());
+ regIoAnalysis.ComputeDepedencies();
+ for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
+ {
+ IMLOptimizer_StandardOptimizationPassForSegment(regIoAnalysis, *segIt);
+ }
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
new file mode 100644
index 00000000..935e61ac
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
@@ -0,0 +1,2204 @@
+#include "IML.h"
+
+#include "../PPCRecompiler.h"
+#include "../PPCRecompilerIml.h"
+#include "IMLRegisterAllocator.h"
+#include "IMLRegisterAllocatorRanges.h"
+
+#include "../BackendX64/BackendX64.h"
+#ifdef __aarch64__
+#include "../BackendAArch64/BackendAArch64.h"
+#endif
+
+#include
+#include
+
+#include "Common/cpu_features.h"
+
+#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed
+#define DEBUG_RA_INSTRUCTION_GEN 0
+
+struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment
+{
+ IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd)
+ : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {};
+
+ void TrackInstruction(sint32 index)
+ {
+ usageStart = std::min(usageStart, index);
+ usageEnd = std::max(usageEnd, index + 1); // exclusive index
+ }
+
+ sint32 usageStart;
+ sint32 usageEnd;
+ bool isProcessed{false};
+ IMLRegFormat regBaseFormat;
+};
+
+struct IMLRegisterAllocatorContext
+{
+ IMLRegisterAllocatorParameters* raParam;
+ ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec
+
+ std::unordered_map regIdToBaseFormat;
+ // first pass
+ std::vector> perSegmentAbstractRanges;
+
+ // helper methods
+ inline std::unordered_map& GetSegmentAbstractRangeMap(IMLSegment* imlSegment)
+ {
+ return perSegmentAbstractRanges[imlSegment->momentaryIndex];
+ }
+
+ inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const
+ {
+ auto it = regIdToBaseFormat.find(regId);
+ cemu_assert_debug(it != regIdToBaseFormat.cend());
+ return it->second;
+ }
+};
+
+struct IMLFixedRegisters
+{
+ struct Entry
+ {
+ Entry(IMLReg reg, IMLPhysRegisterSet physRegSet)
+ : reg(reg), physRegSet(physRegSet) {}
+
+ IMLReg reg;
+ IMLPhysRegisterSet physRegSet;
+ };
+ boost::container::small_vector listInput; // fixed register requirements for instruction input edge
+ boost::container::small_vector listOutput; // fixed register requirements for instruction output edge
+};
+
+static void SetupCallingConvention(const IMLInstruction* instruction, IMLFixedRegisters& fixedRegs, const IMLPhysReg intParamToPhysReg[3], const IMLPhysReg floatParamToPhysReg[3], const IMLPhysReg intReturnPhysReg, const IMLPhysReg floatReturnPhysReg, IMLPhysRegisterSet volatileRegisters)
+{
+ sint32 numIntParams = 0, numFloatParams = 0;
+
+ auto AddParameterMapping = [&](IMLReg reg) {
+ if (!reg.IsValid())
+ return;
+ if (reg.GetBaseFormat() == IMLRegFormat::I64)
+ {
+ IMLPhysRegisterSet ps;
+ ps.SetAvailable(intParamToPhysReg[numIntParams]);
+ fixedRegs.listInput.emplace_back(reg, ps);
+ numIntParams++;
+ }
+ else if (reg.GetBaseFormat() == IMLRegFormat::F64)
+ {
+ IMLPhysRegisterSet ps;
+ ps.SetAvailable(floatParamToPhysReg[numFloatParams]);
+ fixedRegs.listInput.emplace_back(reg, ps);
+ numFloatParams++;
+ }
+ else
+ {
+ cemu_assert_suspicious();
+ }
+ };
+ AddParameterMapping(instruction->op_call_imm.regParam0);
+ AddParameterMapping(instruction->op_call_imm.regParam1);
+ AddParameterMapping(instruction->op_call_imm.regParam2);
+ // return value
+ if (instruction->op_call_imm.regReturn.IsValid())
+ {
+ IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
+ bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
+ IMLPhysRegisterSet ps;
+ if (isIntegerFormat)
+ {
+ ps.SetAvailable(intReturnPhysReg);
+ volatileRegisters.SetReserved(intReturnPhysReg);
+ }
+ else
+ {
+ ps.SetAvailable(floatReturnPhysReg);
+ volatileRegisters.SetReserved(floatReturnPhysReg);
+ }
+ fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
+ }
+ // block volatile registers from being used on the output edge, this makes the register allocator store them during the call
+ fixedRegs.listOutput.emplace_back(IMLREG_INVALID, volatileRegisters);
+}
+
+#if defined(__aarch64__)
+// aarch64
+static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
+{
+ fixedRegs.listInput.clear();
+ fixedRegs.listOutput.clear();
+
+ // The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
+ // on x86 this is used for instructions like SHL , CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
+ if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
+ const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
+ IMLPhysRegisterSet volatileRegs;
+ for (int i = 0; i <= 17; i++) // x0 to x17 are volatile
+ volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
+ // v0-v7 & v16-v31 are volatile. For v8-v15 only the high 64 bits are volatile.
+ for (int i = 0; i <= 7; i++)
+ volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
+ for (int i = 16; i <= 31; i++)
+ volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
+ SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
+ }
+}
+#else
+// x86-64
+static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
+{
+ fixedRegs.listInput.clear();
+ fixedRegs.listOutput.clear();
+
+ if (instruction->type == PPCREC_IML_TYPE_R_R_R)
+ {
+ if (instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
+ {
+ if(!g_CPUFeatures.x86.bmi2)
+ {
+ IMLPhysRegisterSet ps;
+ ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_ECX);
+ fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps);
+ }
+ }
+ }
+ else if (instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
+ {
+ IMLPhysRegisterSet ps;
+ ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX);
+ fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX
+ fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX
+ }
+ else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
+ {
+ const IMLPhysReg intParamToPhysReg[3] = {IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8};
+ const IMLPhysReg floatParamToPhysReg[3] = {IMLArchX86::PHYSREG_FPR_BASE + 0, IMLArchX86::PHYSREG_FPR_BASE + 1, IMLArchX86::PHYSREG_FPR_BASE + 2};
+ IMLPhysRegisterSet volatileRegs;
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
+ // YMM0-YMM5 are volatile
+ for (int i = 0; i <= 5; i++)
+ volatileRegs.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + i);
+ // for YMM6-YMM15 only the upper 128 bits are volatile which we dont use
+ SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX, IMLArchX86::PHYSREG_FPR_BASE + 0, volatileRegs);
+ }
+}
+#endif
+
+uint32 IMLRA_GetNextIterationIndex()
+{
+ static uint32 recRACurrentIterationIndex = 0;
+ recRACurrentIterationIndex++;
+ return recRACurrentIterationIndex;
+}
+
+bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex, IMLSegment* imlSegmentLoopBase)
+{
+ if (currentSegment == imlSegmentLoopBase)
+ return true;
+ if (currentSegment->raInfo.lastIterationIndex == iterationIndex)
+ return currentSegment->raInfo.isPartOfProcessedLoop;
+ if (depth >= 9)
+ return false;
+ currentSegment->raInfo.lastIterationIndex = iterationIndex;
+ currentSegment->raInfo.isPartOfProcessedLoop = false;
+
+ if (currentSegment->nextSegmentIsUncertain)
+ return false;
+ if (currentSegment->nextSegmentBranchNotTaken)
+ {
+ if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex)
+ {
+ currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
+ }
+ }
+ if (currentSegment->nextSegmentBranchTaken)
+ {
+ if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex)
+ {
+ currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
+ }
+ }
+ if (currentSegment->raInfo.isPartOfProcessedLoop)
+ currentSegment->loopDepth++;
+ return currentSegment->raInfo.isPartOfProcessedLoop;
+}
+
+void IMLRA_DetectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase)
+{
+ uint32 iterationIndex = IMLRA_GetNextIterationIndex();
+ imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex;
+ if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase))
+ {
+ imlSegmentLoopBase->loopDepth++;
+ }
+}
+
+void IMLRA_IdentifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
+{
+ if (imlSegment->nextSegmentIsUncertain)
+ return;
+ // check if this segment has a branch that links to itself (tight loop)
+ if (imlSegment->nextSegmentBranchTaken == imlSegment)
+ {
+ // segment loops over itself
+ imlSegment->loopDepth++;
+ return;
+ }
+ // check if this segment has a branch that goes backwards (potential complex loop)
+ if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex)
+ {
+ IMLRA_DetectLoop(ppcImlGenContext, imlSegment);
+ }
+}
+
+#define SUBRANGE_LIST_SIZE (128)
+
+sint32 IMLRA_CountDistanceUntilNextUse(raLivenessRange* subrange, raInstructionEdge startPosition)
+{
+ for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++)
+ {
+ if (subrange->list_accessLocations[i].pos >= startPosition)
+ {
+ auto& it = subrange->list_accessLocations[i];
+ cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write
+ cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment());
+ return it.pos.GetRaw() - startPosition.GetRaw();
+ }
+ }
+ cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000);
+ return 10001 * 2;
+}
+
+// returns -1 if there is no fixed register requirement on or after startPosition
+sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLivenessRange* range, raInstructionEdge startPosition, sint32 physRegister, bool& hasFixedAccess)
+{
+ hasFixedAccess = false;
+ cemu_assert_debug(startPosition.IsInstructionIndex());
+ for (auto& fixedReqEntry : range->list_fixedRegRequirements)
+ {
+ if (fixedReqEntry.pos < startPosition)
+ continue;
+ if (fixedReqEntry.allowedReg.IsAvailable(physRegister))
+ {
+ hasFixedAccess = true;
+ return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw();
+ }
+ }
+ cemu_assert_debug(range->interval.end.IsInstructionIndex());
+ return range->interval.end.GetRaw() - startPosition.GetRaw();
+}
+
+sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister)
+{
+ cemu_assert_debug(startPosition.IsInstructionIndex());
+ raInstructionEdge lastPos2;
+ lastPos2.Set(imlSegment->imlList.size(), false);
+
+ raInstructionEdge endPos;
+ endPos = startPosition + maxDistance;
+ if (endPos > lastPos2)
+ endPos = lastPos2;
+ IMLFixedRegisters fixedRegs;
+ if (startPosition.IsOnOutputEdge())
+ GetInstructionFixedRegisters(imlSegment->imlList.data() + startPosition.GetInstructionIndex(), fixedRegs);
+ for (raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos)
+ {
+ if (currentPos.IsOnInputEdge())
+ {
+ GetInstructionFixedRegisters(imlSegment->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
+ }
+ auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
+ for (auto& fixedRegLoc : fixedRegAccess)
+ {
+ if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
+ {
+ cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers
+ if (fixedRegLoc.physRegSet.IsAvailable(physRegister))
+ return currentPos.GetRaw() - startPosition.GetRaw();
+ }
+ }
+ }
+ return endPos.GetRaw() - startPosition.GetRaw();
+}
+
+// count how many instructions there are until physRegister is used by any subrange or reserved for any fixed register requirement (returns 0 if register is in use at startIndex)
+sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister)
+{
+ cemu_assert_debug(startPosition.IsInstructionIndex());
+ sint32 minDistance = (sint32)imlSegment->imlList.size() * 2 - startPosition.GetRaw();
+ // next
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ if (subrangeItr->GetPhysicalRegister() != physRegister)
+ {
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ continue;
+ }
+ if (subrangeItr->interval.ContainsEdge(startPosition))
+ return 0;
+ if (subrangeItr->interval.end < startPosition)
+ {
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ continue;
+ }
+ cemu_assert_debug(startPosition <= subrangeItr->interval.start);
+ sint32 currentDist = subrangeItr->interval.start.GetRaw() - startPosition.GetRaw();
+ minDistance = std::min(minDistance, currentDist);
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ return minDistance;
+}
+
+struct IMLRALivenessTimeline
+{
+ IMLRALivenessTimeline()
+ {
+ }
+
+ // manually add an active range
+ void AddActiveRange(raLivenessRange* subrange)
+ {
+ activeRanges.emplace_back(subrange);
+ }
+
+ void ExpireRanges(raInstructionEdge expireUpTo)
+ {
+ expiredRanges.clear();
+ size_t count = activeRanges.size();
+ for (size_t f = 0; f < count; f++)
+ {
+ raLivenessRange* liverange = activeRanges[f];
+ if (liverange->interval.end < expireUpTo) // this was <= but since end is not inclusive we need to use <
+ {
+#ifdef CEMU_DEBUG_ASSERT
+ if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken))
+ assert_dbg(); // infinite subranges should not expire
+#endif
+ expiredRanges.emplace_back(liverange);
+ // remove entry
+ activeRanges[f] = activeRanges[count - 1];
+ f--;
+ count--;
+ }
+ }
+ if (count != activeRanges.size())
+ activeRanges.resize(count);
+ }
+
+ std::span GetExpiredRanges()
+ {
+ return {expiredRanges.data(), expiredRanges.size()};
+ }
+
+ std::span GetActiveRanges()
+ {
+ return {activeRanges.data(), activeRanges.size()};
+ }
+
+ raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId)
+ {
+ for (auto& it : activeRanges)
+ if (it->virtualRegister == regId)
+ return it;
+ return nullptr;
+ }
+
+ raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg)
+ {
+ cemu_assert_debug(physReg >= 0);
+ for (auto& it : activeRanges)
+ if (it->physicalRegister == physReg)
+ return it;
+ return nullptr;
+ }
+
+ boost::container::small_vector activeRanges;
+
+ private:
+ boost::container::small_vector expiredRanges;
+};
+
+// mark occupied registers by any overlapping range as unavailable in physRegSet
+void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet)
+{
+ auto clusterRanges = range2->GetAllSubrangesInCluster();
+ for (auto& subrange : clusterRanges)
+ {
+ IMLSegment* imlSegment = subrange->imlSegment;
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ if (subrange == subrangeItr)
+ {
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ continue;
+ }
+ if (subrange->interval.IsOverlapping(subrangeItr->interval))
+ {
+ if (subrangeItr->GetPhysicalRegister() >= 0)
+ physRegSet.SetReserved(subrangeItr->GetPhysicalRegister());
+ }
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ }
+}
+
+bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs)
+{
+ return lhs->interval.start < rhs->interval.start;
+}
+
+void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
+{
+ raLivenessRange* subrangeList[4096 + 1];
+ sint32 count = 0;
+ // disassemble linked list
+ raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ cemu_assert(count < 4096);
+ subrangeList[count] = subrangeItr;
+ count++;
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ if (count == 0)
+ {
+ imlSegment->raInfo.linkedList_allSubranges = nullptr;
+ return;
+ }
+ // sort
+ std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare);
+ // reassemble linked list
+ subrangeList[count] = nullptr;
+ imlSegment->raInfo.linkedList_allSubranges = subrangeList[0];
+ subrangeList[0]->link_allSegmentRanges.prev = nullptr;
+ subrangeList[0]->link_allSegmentRanges.next = subrangeList[1];
+ for (sint32 i = 1; i < count; i++)
+ {
+ subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1];
+ subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1];
+ }
+ // validate list
+#if DEBUG_RA_EXTRA_VALIDATION
+ sint32 count2 = 0;
+ subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ raInstructionEdge currentStartPosition;
+ currentStartPosition.SetRaw(RA_INTER_RANGE_START);
+ while (subrangeItr)
+ {
+ count2++;
+ if (subrangeItr->interval2.start < currentStartPosition)
+ assert_dbg();
+ currentStartPosition = subrangeItr->interval2.start;
+ // next
+ subrangeItr = subrangeItr->link_allSegmentRanges.next;
+ }
+ if (count != count2)
+ assert_dbg();
+#endif
+}
+
+std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment)
+{
+ return imlSegment->raInfo.linkedList_perVirtualRegister;
+}
+
+raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId)
+{
+ auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId);
+ if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end())
+ return nullptr;
+ return it->second;
+}
+
+struct raFixedRegRequirementWithVGPR
+{
+ raFixedRegRequirementWithVGPR(raInstructionEdge pos, IMLPhysRegisterSet allowedReg, IMLRegID regId)
+ : pos(pos), allowedReg(allowedReg), regId(regId) {}
+
+ raInstructionEdge pos;
+ IMLPhysRegisterSet allowedReg;
+ IMLRegID regId;
+};
+
+std::vector IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment)
+{
+ std::vector frrList;
+ size_t index = 0;
+ while (index < imlSegment->imlList.size())
+ {
+ IMLFixedRegisters fixedRegs;
+ GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
+ raInstructionEdge pos;
+ pos.Set(index, true);
+ for (auto& fixedRegAccess : fixedRegs.listInput)
+ {
+ frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
+ }
+ pos = pos + 1;
+ for (auto& fixedRegAccess : fixedRegs.listOutput)
+ {
+ frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
+ }
+ index++;
+ }
+ return frrList;
+}
+
+boost::container::small_vector IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg)
+{
+ boost::container::small_vector rangeList;
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
+ {
+ if (!currentRange->interval.ContainsEdge(pos))
+ continue;
+ IMLPhysRegisterSet allowedRegs;
+ if (!currentRange->GetAllowedRegistersEx(allowedRegs))
+ continue;
+ if (allowedRegs.IsAvailable(physReg))
+ rangeList.emplace_back(currentRange);
+ }
+ return rangeList;
+}
+
+void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
+{
+ // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border
+ // todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This can be avoided in some cases
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;)
+ {
+ IMLPhysRegisterSet allowedRegs;
+ if(currentRange->list_fixedRegRequirements.empty())
+ {
+ currentRange = currentRange->link_allSegmentRanges.next;
+ continue; // since we run this pass for every segment we dont need to do global checks here for clusters which may not even have fixed register requirements
+ }
+ if (!currentRange->GetAllowedRegistersEx(allowedRegs))
+ {
+ currentRange = currentRange->link_allSegmentRanges.next;
+ continue;
+ }
+ if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
+ {
+ raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next;
+ IMLRA_ExplodeRangeCluster(ppcImlGenContext, currentRange);
+ currentRange = nextRange;
+ continue;
+ }
+ currentRange = currentRange->link_allSegmentRanges.next;
+ }
+ // second pass - look for ranges with conflicting fixed register requirements and split these too (locally)
+ for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
+ {
+ IMLPhysRegisterSet allowedRegs;
+ if (currentRange->list_fixedRegRequirements.empty())
+ continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
+ if (!currentRange->GetAllowedRegistersEx(allowedRegs))
+ continue;
+ if (allowedRegs.HasAnyAvailable())
+ continue;
+ cemu_assert_unimplemented();
+ }
+ // third pass - assign fixed registers, split ranges if needed
+ std::vector