diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e798c1a7..6ae4b892 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -177,9 +177,6 @@ jobs:
build-macos:
runs-on: macos-14
- strategy:
- matrix:
- arch: [x86_64, arm64]
steps:
- name: "Checkout repo"
uses: actions/checkout@v4
@@ -205,7 +202,7 @@ jobs:
- name: "Install molten-vk"
run: |
- curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.3.0/MoltenVK-macos.tar
+ curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.2.9/MoltenVK-macos.tar
tar xf MoltenVK-macos.tar
sudo mkdir -p /usr/local/lib
sudo cp MoltenVK/MoltenVK/dynamic/dylib/macOS/libMoltenVK.dylib /usr/local/lib
@@ -239,7 +236,7 @@ jobs:
cd build
cmake .. ${{ env.BUILD_FLAGS }} \
-DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \
- -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \
+ -DCMAKE_OSX_ARCHITECTURES=x86_64 \
-DMACOS_BUNDLE=ON \
-G Ninja
@@ -262,5 +259,5 @@ jobs:
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
- name: cemu-bin-macos-${{ matrix.arch }}
+ name: cemu-bin-macos-x64
path: ./bin/Cemu.dmg
diff --git a/.gitmodules b/.gitmodules
index 8f9772d3..dc69c441 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -18,6 +18,3 @@
path = dependencies/imgui
url = https://github.com/ocornut/imgui
shallow = true
-[submodule "dependencies/xbyak_aarch64"]
- path = dependencies/xbyak_aarch64
- url = https://github.com/fujitsu/xbyak_aarch64
diff --git a/CMakeLists.txt b/CMakeLists.txt
index aa491b9e..560728f2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -166,7 +166,7 @@ if (UNIX AND NOT APPLE)
if(ENABLE_BLUEZ)
find_package(bluez REQUIRED)
- set(SUPPORTS_WIIMOTE ON)
+ set(ENABLE_WIIMOTE ON)
add_compile_definitions(HAS_BLUEZ)
endif()
@@ -188,7 +188,7 @@ endif()
if (ENABLE_HIDAPI)
find_package(hidapi REQUIRED)
- set(SUPPORTS_WIIMOTE ON)
+ set(ENABLE_WIIMOTE ON)
add_compile_definitions(HAS_HIDAPI)
endif ()
@@ -222,18 +222,9 @@ endif()
add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL)
-if (CMAKE_OSX_ARCHITECTURES)
- set(CEMU_ARCHITECTURE ${CMAKE_OSX_ARCHITECTURES})
-else()
- set(CEMU_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
-endif()
-if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
- add_subdirectory("dependencies/xbyak_aarch64" EXCLUDE_FROM_ALL)
-endif()
-
find_package(ZArchive)
if (NOT ZArchive_FOUND)
add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL)
endif()
-add_subdirectory(src)
\ No newline at end of file
+add_subdirectory(src)
diff --git a/boost.natvis b/boost.natvis
deleted file mode 100644
index 2781a585..00000000
--- a/boost.natvis
+++ /dev/null
@@ -1,26 +0,0 @@
-
-
-
-
-
- - m_holder.m_size
-
- m_holder.m_size
- m_holder.m_start
-
-
-
-
-
- {{ size={m_holder.m_size} }}
-
- - m_holder.m_size
- - static_capacity
-
- m_holder.m_size
- ($T1*)m_holder.storage.data
-
-
-
-
-
diff --git a/dependencies/ih264d/CMakeLists.txt b/dependencies/ih264d/CMakeLists.txt
index 64ac0931..686a9d08 100644
--- a/dependencies/ih264d/CMakeLists.txt
+++ b/dependencies/ih264d/CMakeLists.txt
@@ -183,9 +183,6 @@ target_sources(ih264d PRIVATE
"decoder/arm/ih264d_function_selector.c"
)
target_compile_options(ih264d PRIVATE -DARMV8)
-if(APPLE)
- target_sources(ih264d PRIVATE "common/armv8/macos_arm_symbol_aliases.s")
-endif()
else()
message(FATAL_ERROR "ih264d unknown architecture: ${IH264D_ARCHITECTURE}")
endif()
diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s
index c0d9cf99..39c02560 100644
--- a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s
+++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s
@@ -429,13 +429,8 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
rev64 v7.4h, v2.4h
ld1 {v3.2s}, [x10]
sub x5, x3, #8
-#ifdef __APPLE__
- adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE
- ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF]
-#else
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1]
-#endif
usubl v10.8h, v5.8b, v1.8b
ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3
mov v8.d[1], v9.d[0]
@@ -489,13 +484,10 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
zip1 v1.8h, v0.8h, v2.8h
zip2 v2.8h, v0.8h, v2.8h
mov v0.16b, v1.16b
-#ifdef __APPLE__
- adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE
- ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF]
-#else
+
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2]
-#endif
+
ld1 {v8.2s, v9.2s}, [x12]
mov v8.d[1], v9.d[0]
mov v10.16b, v8.16b
diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s
index 2422d8cd..fa19c121 100644
--- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s
+++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s
@@ -431,13 +431,10 @@ ih264_intra_pred_luma_16x16_mode_plane_av8:
mov x10, x1 //top_left
mov x4, #-1
ld1 {v2.2s}, [x1], x8
-#ifdef __APPLE__
- adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE
- ldr x7, [x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF]
-#else
+
adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs
ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs]
-#endif
+
ld1 {v0.2s}, [x1]
rev64 v2.8b, v2.8b
ld1 {v6.2s, v7.2s}, [x7]
diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s
index 6fa31ded..273aa81b 100644
--- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s
+++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s
@@ -1029,13 +1029,9 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8:
mov v3.d[0], v2.d[1]
ext v4.16b, v2.16b , v2.16b , #1
mov v5.d[0], v4.d[1]
-#ifdef __APPLE__
- adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE
- ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF]
-#else
+
adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u]
-#endif
uaddl v20.8h, v0.8b, v2.8b
uaddl v22.8h, v1.8b, v3.8b
uaddl v24.8h, v2.8b, v4.8b
diff --git a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s
index 8d6aa995..475f690e 100644
--- a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s
+++ b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s
@@ -142,22 +142,14 @@ ih264_weighted_bi_pred_luma_av8:
sxtw x4, w4
sxtw x5, w5
stp x19, x20, [sp, #-16]!
-#ifndef __APPLE__
ldr w8, [sp, #80] //Load wt2 in w8
ldr w9, [sp, #88] //Load ofst1 in w9
- ldr w10, [sp, #96] //Load ofst2 in w10
- ldr w11, [sp, #104] //Load ht in w11
- ldr w12, [sp, #112] //Load wd in w12
-#else
- ldr w8, [sp, #80] //Load wt2 in w8
- ldr w9, [sp, #84] //Load ofst1 in w9
- ldr w10, [sp, #88] //Load ofst2 in w10
- ldr w11, [sp, #92] //Load ht in w11
- ldr w12, [sp, #96] //Load wd in w12
-#endif
add w6, w6, #1 //w6 = log_WD + 1
neg w10, w6 //w10 = -(log_WD + 1)
dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
+ ldr w10, [sp, #96] //Load ofst2 in w10
+ ldr w11, [sp, #104] //Load ht in w11
+ ldr w12, [sp, #112] //Load wd in w12
add w9, w9, #1 //w9 = ofst1 + 1
add w9, w9, w10 //w9 = ofst1 + ofst2 + 1
mov v2.s[0], w7
@@ -432,24 +424,17 @@ ih264_weighted_bi_pred_chroma_av8:
sxtw x5, w5
stp x19, x20, [sp, #-16]!
-#ifndef __APPLE__
+
ldr w8, [sp, #80] //Load wt2 in w8
- ldr w9, [sp, #88] //Load ofst1 in w9
- ldr w10, [sp, #96] //Load ofst2 in w10
- ldr w11, [sp, #104] //Load ht in w11
- ldr w12, [sp, #112] //Load wd in w12
-#else
- ldr w8, [sp, #80] //Load wt2 in w8
- ldr w9, [sp, #84] //Load ofst1 in w9
- ldr w10, [sp, #88] //Load ofst2 in w10
- ldr w11, [sp, #92] //Load ht in w11
- ldr w12, [sp, #96] //Load wd in w12
-#endif
dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit)
dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit)
add w6, w6, #1 //w6 = log_WD + 1
+ ldr w9, [sp, #88] //Load ofst1 in w9
+ ldr w10, [sp, #96] //Load ofst2 in w10
neg w20, w6 //w20 = -(log_WD + 1)
dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit)
+ ldr w11, [sp, #104] //Load ht in x11
+ ldr w12, [sp, #112] //Load wd in x12
dup v20.8h, w9 //0ffset1
dup v21.8h, w10 //0ffset2
srhadd v6.8b, v20.8b, v21.8b
diff --git a/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s b/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s
deleted file mode 100644
index 3639f1b3..00000000
--- a/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s
+++ /dev/null
@@ -1,185 +0,0 @@
-// macOS clang compilers append preceding underscores to function names, this is to prevent
-// mismatches with the assembly function names and the C functions as defined in the header.
-
-.global _ih264_deblk_chroma_horz_bs4_av8
-_ih264_deblk_chroma_horz_bs4_av8 = ih264_deblk_chroma_horz_bs4_av8
-
-.global _ih264_deblk_chroma_horz_bslt4_av8
-_ih264_deblk_chroma_horz_bslt4_av8 = ih264_deblk_chroma_horz_bslt4_av8
-
-.global _ih264_deblk_chroma_vert_bs4_av8
-_ih264_deblk_chroma_vert_bs4_av8 = ih264_deblk_chroma_vert_bs4_av8
-
-.global _ih264_deblk_chroma_vert_bslt4_av8
-_ih264_deblk_chroma_vert_bslt4_av8 = ih264_deblk_chroma_vert_bslt4_av8
-
-.global _ih264_deblk_luma_horz_bs4_av8
-_ih264_deblk_luma_horz_bs4_av8 = ih264_deblk_luma_horz_bs4_av8
-
-.global _ih264_deblk_luma_horz_bslt4_av8
-_ih264_deblk_luma_horz_bslt4_av8 = ih264_deblk_luma_horz_bslt4_av8
-
-.global _ih264_deblk_luma_vert_bs4_av8
-_ih264_deblk_luma_vert_bs4_av8 = ih264_deblk_luma_vert_bs4_av8
-
-.global _ih264_deblk_luma_vert_bslt4_av8
-_ih264_deblk_luma_vert_bslt4_av8 = ih264_deblk_luma_vert_bslt4_av8
-
-.global _ih264_default_weighted_pred_chroma_av8
-_ih264_default_weighted_pred_chroma_av8 = ih264_default_weighted_pred_chroma_av8
-
-.global _ih264_default_weighted_pred_luma_av8
-_ih264_default_weighted_pred_luma_av8 = ih264_default_weighted_pred_luma_av8
-
-.global _ih264_ihadamard_scaling_4x4_av8
-_ih264_ihadamard_scaling_4x4_av8 = ih264_ihadamard_scaling_4x4_av8
-
-.global _ih264_inter_pred_chroma_av8
-_ih264_inter_pred_chroma_av8 = ih264_inter_pred_chroma_av8
-
-.global _ih264_inter_pred_luma_copy_av8
-_ih264_inter_pred_luma_copy_av8 = ih264_inter_pred_luma_copy_av8
-
-.global _ih264_inter_pred_luma_horz_av8
-_ih264_inter_pred_luma_horz_av8 = ih264_inter_pred_luma_horz_av8
-
-.global _ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
-_ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
-
-.global _ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
-_ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
-
-.global _ih264_inter_pred_luma_horz_qpel_av8
-_ih264_inter_pred_luma_horz_qpel_av8 = ih264_inter_pred_luma_horz_qpel_av8
-
-.global _ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
-_ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
-
-.global _ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
-_ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
-
-.global _ih264_inter_pred_luma_vert_av8
-_ih264_inter_pred_luma_vert_av8 = ih264_inter_pred_luma_vert_av8
-
-.global _ih264_inter_pred_luma_vert_qpel_av8
-_ih264_inter_pred_luma_vert_qpel_av8 = ih264_inter_pred_luma_vert_qpel_av8
-
-.global _ih264_intra_pred_chroma_8x8_mode_horz_av8
-_ih264_intra_pred_chroma_8x8_mode_horz_av8 = ih264_intra_pred_chroma_8x8_mode_horz_av8
-
-.global _ih264_intra_pred_chroma_8x8_mode_plane_av8
-_ih264_intra_pred_chroma_8x8_mode_plane_av8 = ih264_intra_pred_chroma_8x8_mode_plane_av8
-
-.global _ih264_intra_pred_chroma_8x8_mode_vert_av8
-_ih264_intra_pred_chroma_8x8_mode_vert_av8 = ih264_intra_pred_chroma_8x8_mode_vert_av8
-
-.global _ih264_intra_pred_luma_16x16_mode_dc_av8
-_ih264_intra_pred_luma_16x16_mode_dc_av8 = ih264_intra_pred_luma_16x16_mode_dc_av8
-
-.global _ih264_intra_pred_luma_16x16_mode_horz_av8
-_ih264_intra_pred_luma_16x16_mode_horz_av8 = ih264_intra_pred_luma_16x16_mode_horz_av8
-
-.global _ih264_intra_pred_luma_16x16_mode_plane_av8
-_ih264_intra_pred_luma_16x16_mode_plane_av8 = ih264_intra_pred_luma_16x16_mode_plane_av8
-
-.global _ih264_intra_pred_luma_16x16_mode_vert_av8
-_ih264_intra_pred_luma_16x16_mode_vert_av8 = ih264_intra_pred_luma_16x16_mode_vert_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_dc_av8
-_ih264_intra_pred_luma_4x4_mode_dc_av8 = ih264_intra_pred_luma_4x4_mode_dc_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_diag_dl_av8
-_ih264_intra_pred_luma_4x4_mode_diag_dl_av8 = ih264_intra_pred_luma_4x4_mode_diag_dl_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_diag_dr_av8
-_ih264_intra_pred_luma_4x4_mode_diag_dr_av8 = ih264_intra_pred_luma_4x4_mode_diag_dr_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_horz_av8
-_ih264_intra_pred_luma_4x4_mode_horz_av8 = ih264_intra_pred_luma_4x4_mode_horz_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_horz_d_av8
-_ih264_intra_pred_luma_4x4_mode_horz_d_av8 = ih264_intra_pred_luma_4x4_mode_horz_d_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_horz_u_av8
-_ih264_intra_pred_luma_4x4_mode_horz_u_av8 = ih264_intra_pred_luma_4x4_mode_horz_u_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_vert_av8
-_ih264_intra_pred_luma_4x4_mode_vert_av8 = ih264_intra_pred_luma_4x4_mode_vert_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_vert_l_av8
-_ih264_intra_pred_luma_4x4_mode_vert_l_av8 = ih264_intra_pred_luma_4x4_mode_vert_l_av8
-
-.global _ih264_intra_pred_luma_4x4_mode_vert_r_av8
-_ih264_intra_pred_luma_4x4_mode_vert_r_av8 = ih264_intra_pred_luma_4x4_mode_vert_r_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_dc_av8
-_ih264_intra_pred_luma_8x8_mode_dc_av8 = ih264_intra_pred_luma_8x8_mode_dc_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_diag_dl_av8
-_ih264_intra_pred_luma_8x8_mode_diag_dl_av8 = ih264_intra_pred_luma_8x8_mode_diag_dl_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_diag_dr_av8
-_ih264_intra_pred_luma_8x8_mode_diag_dr_av8 = ih264_intra_pred_luma_8x8_mode_diag_dr_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_horz_av8
-_ih264_intra_pred_luma_8x8_mode_horz_av8 = ih264_intra_pred_luma_8x8_mode_horz_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_horz_d_av8
-_ih264_intra_pred_luma_8x8_mode_horz_d_av8 = ih264_intra_pred_luma_8x8_mode_horz_d_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_horz_u_av8
-_ih264_intra_pred_luma_8x8_mode_horz_u_av8 = ih264_intra_pred_luma_8x8_mode_horz_u_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_vert_av8
-_ih264_intra_pred_luma_8x8_mode_vert_av8 = ih264_intra_pred_luma_8x8_mode_vert_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_vert_l_av8
-_ih264_intra_pred_luma_8x8_mode_vert_l_av8 = ih264_intra_pred_luma_8x8_mode_vert_l_av8
-
-.global _ih264_intra_pred_luma_8x8_mode_vert_r_av8
-_ih264_intra_pred_luma_8x8_mode_vert_r_av8 = ih264_intra_pred_luma_8x8_mode_vert_r_av8
-
-.global _ih264_iquant_itrans_recon_4x4_av8
-_ih264_iquant_itrans_recon_4x4_av8 = ih264_iquant_itrans_recon_4x4_av8
-
-.global _ih264_iquant_itrans_recon_4x4_dc_av8
-_ih264_iquant_itrans_recon_4x4_dc_av8 = ih264_iquant_itrans_recon_4x4_dc_av8
-
-.global _ih264_iquant_itrans_recon_8x8_av8
-_ih264_iquant_itrans_recon_8x8_av8 = ih264_iquant_itrans_recon_8x8_av8
-
-.global _ih264_iquant_itrans_recon_8x8_dc_av8
-_ih264_iquant_itrans_recon_8x8_dc_av8 = ih264_iquant_itrans_recon_8x8_dc_av8
-
-.global _ih264_iquant_itrans_recon_chroma_4x4_av8
-_ih264_iquant_itrans_recon_chroma_4x4_av8 = ih264_iquant_itrans_recon_chroma_4x4_av8
-
-.global _ih264_iquant_itrans_recon_chroma_4x4_dc_av8
-_ih264_iquant_itrans_recon_chroma_4x4_dc_av8 = ih264_iquant_itrans_recon_chroma_4x4_dc_av8
-
-.global _ih264_pad_left_chroma_av8
-_ih264_pad_left_chroma_av8 = ih264_pad_left_chroma_av8
-
-.global _ih264_pad_left_luma_av8
-_ih264_pad_left_luma_av8 = ih264_pad_left_luma_av8
-
-.global _ih264_pad_right_chroma_av8
-_ih264_pad_right_chroma_av8 = ih264_pad_right_chroma_av8
-
-.global _ih264_pad_right_luma_av8
-_ih264_pad_right_luma_av8 = ih264_pad_right_luma_av8
-
-.global _ih264_pad_top_av8
-_ih264_pad_top_av8 = ih264_pad_top_av8
-
-.global _ih264_weighted_bi_pred_chroma_av8
-_ih264_weighted_bi_pred_chroma_av8 = ih264_weighted_bi_pred_chroma_av8
-
-.global _ih264_weighted_bi_pred_luma_av8
-_ih264_weighted_bi_pred_luma_av8 = ih264_weighted_bi_pred_luma_av8
-
-.global _ih264_weighted_pred_chroma_av8
-_ih264_weighted_pred_chroma_av8 = ih264_weighted_pred_chroma_av8
-
-.global _ih264_weighted_pred_luma_av8
-_ih264_weighted_pred_luma_av8 = ih264_weighted_pred_luma_av8
\ No newline at end of file
diff --git a/dependencies/vcpkg b/dependencies/vcpkg
index 533a5fda..a4275b7e 160000
--- a/dependencies/vcpkg
+++ b/dependencies/vcpkg
@@ -1 +1 @@
-Subproject commit 533a5fda5c0646d1771345fb572e759283444d5f
+Subproject commit a4275b7eee79fb24ec2e135481ef5fce8b41c339
diff --git a/dependencies/xbyak_aarch64 b/dependencies/xbyak_aarch64
deleted file mode 160000
index 904b8923..00000000
--- a/dependencies/xbyak_aarch64
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 904b8923457f3ec0d6f82ea2d6832a792851194d
diff --git a/dist/linux/info.cemu.Cemu.desktop b/dist/linux/info.cemu.Cemu.desktop
index 6eeb0120..5003d4a6 100644
--- a/dist/linux/info.cemu.Cemu.desktop
+++ b/dist/linux/info.cemu.Cemu.desktop
@@ -24,4 +24,3 @@ Comment[it]=Software per emulare giochi e applicazioni per Wii U su PC
Categories=Game;Emulator;
Keywords=Nintendo;
MimeType=application/x-wii-u-rom;
-StartupWMClass=Cemu
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 04b6dfdd..79471321 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -49,6 +49,7 @@ add_subdirectory(audio)
add_subdirectory(util)
add_subdirectory(imgui)
add_subdirectory(resource)
+add_subdirectory(asm)
add_executable(CemuBin
main.cpp
@@ -101,21 +102,13 @@ if (MACOS_BUNDLE)
endforeach(folder)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
- set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/debug/lib/libusb-1.0.0.dylib")
+ set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/debug/lib/libusb-1.0.0.dylib")
else()
- set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib")
+ set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib")
endif()
- if (EXISTS "/usr/local/lib/libMoltenVK.dylib")
- set(MOLTENVK_PATH "/usr/local/lib/libMoltenVK.dylib")
- elseif (EXISTS "/opt/homebrew/lib/libMoltenVK.dylib")
- set(MOLTENVK_PATH "/opt/homebrew/lib/libMoltenVK.dylib")
- else()
- message(FATAL_ERROR "failed to find libMoltenVK.dylib")
- endif ()
-
add_custom_command (TARGET CemuBin POST_BUILD
- COMMAND ${CMAKE_COMMAND} ARGS -E copy "${MOLTENVK_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib"
+ COMMAND ${CMAKE_COMMAND} ARGS -E copy "/usr/local/lib/libMoltenVK.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib"
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${LIBUSB_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib"
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_SOURCE_DIR}/src/resource/update.sh" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/update.sh"
COMMAND bash -c "install_name_tool -add_rpath @executable_path/../Frameworks ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}"
diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt
index 2900059b..d51d58d5 100644
--- a/src/Cafe/CMakeLists.txt
+++ b/src/Cafe/CMakeLists.txt
@@ -67,31 +67,24 @@ add_library(CemuCafe
HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
HW/Espresso/Recompiler/PPCRecompiler.cpp
HW/Espresso/Recompiler/PPCRecompiler.h
- HW/Espresso/Recompiler/IML/IML.h
- HW/Espresso/Recompiler/IML/IMLSegment.cpp
- HW/Espresso/Recompiler/IML/IMLSegment.h
- HW/Espresso/Recompiler/IML/IMLInstruction.cpp
- HW/Espresso/Recompiler/IML/IMLInstruction.h
- HW/Espresso/Recompiler/IML/IMLDebug.cpp
- HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
- HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
- HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
- HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
- HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
- HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
+ HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp
HW/Espresso/Recompiler/PPCRecompilerIml.h
+ HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
+ HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
+ HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
+ HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp
+ HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp
HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
- HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
- HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
- HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
- HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
- HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
- HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
- HW/Espresso/Recompiler/BackendX64/BackendX64.h
- HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
- HW/Espresso/Recompiler/BackendX64/x86Emitter.h
+ HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
+ HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
+ HW/Espresso/Recompiler/PPCRecompilerX64.cpp
+ HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp
+ HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
+ HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
+ HW/Espresso/Recompiler/PPCRecompilerX64.h
+ HW/Espresso/Recompiler/x64Emit.hpp
HW/Latte/Common/RegisterSerializer.cpp
HW/Latte/Common/RegisterSerializer.h
HW/Latte/Common/ShaderSerializer.cpp
@@ -476,10 +469,6 @@ add_library(CemuCafe
OS/libs/nsyshid/Infinity.h
OS/libs/nsyshid/Skylander.cpp
OS/libs/nsyshid/Skylander.h
- OS/libs/nsyshid/SkylanderXbox360.cpp
- OS/libs/nsyshid/SkylanderXbox360.h
- OS/libs/nsyshid/g721/g721.cpp
- OS/libs/nsyshid/g721/g721.h
OS/libs/nsyskbd/nsyskbd.cpp
OS/libs/nsyskbd/nsyskbd.h
OS/libs/nsysnet/nsysnet.cpp
@@ -537,14 +526,6 @@ if(APPLE)
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm")
endif()
-if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
- target_sources(CemuCafe PRIVATE
- HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
- HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
- )
- target_link_libraries(CemuCafe PRIVATE xbyak_aarch64)
-endif()
-
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>")
target_include_directories(CemuCafe PUBLIC "../")
@@ -552,10 +533,11 @@ target_include_directories(CemuCafe PUBLIC "../")
if (glslang_VERSION VERSION_LESS "15.0.0")
set(glslang_target "glslang::SPIRV")
else()
- set(glslang_target "glslang::glslang")
+ set(glslang_target "glslang")
endif()
target_link_libraries(CemuCafe PRIVATE
+ CemuAsm
CemuAudio
CemuCommon
CemuComponents
diff --git a/src/Cafe/CafeSystem.cpp b/src/Cafe/CafeSystem.cpp
index d20ccd9d..1bf3755e 100644
--- a/src/Cafe/CafeSystem.cpp
+++ b/src/Cafe/CafeSystem.cpp
@@ -844,7 +844,7 @@ namespace CafeSystem
module->TitleStart();
cemu_initForGame();
// enter scheduler
- if ((ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler || LaunchSettings::ForceMultiCoreInterpreter()) && !LaunchSettings::ForceInterpreter())
+ if (ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler && !LaunchSettings::ForceInterpreter())
coreinit::OSSchedulerBegin(3);
else
coreinit::OSSchedulerBegin(1);
diff --git a/src/Cafe/Filesystem/FST/FST.cpp b/src/Cafe/Filesystem/FST/FST.cpp
index ec112b9a..f1255778 100644
--- a/src/Cafe/Filesystem/FST/FST.cpp
+++ b/src/Cafe/Filesystem/FST/FST.cpp
@@ -13,8 +13,6 @@
#define SET_FST_ERROR(__code) if (errorCodeOut) *errorCodeOut = ErrorCode::__code
-static_assert(sizeof(NCrypto::AesIv) == 16); // make sure IV is actually 16 bytes
-
class FSTDataSource
{
public:
@@ -870,7 +868,7 @@ static_assert(sizeof(FSTHashedBlock) == BLOCK_SIZE);
struct FSTCachedRawBlock
{
FSTRawBlock blockData;
- NCrypto::AesIv ivForNextBlock;
+ uint8 ivForNextBlock[16];
uint64 lastAccess;
};
@@ -921,13 +919,13 @@ void FSTVolume::TrimCacheIfRequired(FSTCachedRawBlock** droppedRawBlock, FSTCach
}
}
-void FSTVolume::DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, NCrypto::AesIv& ivOut)
+void FSTVolume::DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, uint8 ivOut[16])
{
- ivOut = {};
+ memset(ivOut, 0, sizeof(ivOut));
if(blockIndex == 0)
{
- ivOut.iv[0] = (uint8)(clusterIndex >> 8);
- ivOut.iv[1] = (uint8)(clusterIndex >> 0);
+ ivOut[0] = (uint8)(clusterIndex >> 8);
+ ivOut[1] = (uint8)(clusterIndex >> 0);
}
else
{
@@ -938,20 +936,20 @@ void FSTVolume::DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex,
auto itr = m_cacheDecryptedRawBlocks.find(cacheBlockId);
if (itr != m_cacheDecryptedRawBlocks.end())
{
- ivOut = itr->second->ivForNextBlock;
+ memcpy(ivOut, itr->second->ivForNextBlock, 16);
}
else
{
- cemu_assert(m_sectorSize >= NCrypto::AesIv::SIZE);
+ cemu_assert(m_sectorSize >= 16);
uint64 clusterOffset = (uint64)m_cluster[clusterIndex].offset * m_sectorSize;
- NCrypto::AesIv prevIV{};
- if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * m_sectorSize - NCrypto::AesIv::SIZE, prevIV.iv, NCrypto::AesIv::SIZE) != NCrypto::AesIv::SIZE)
+ uint8 prevIV[16];
+ if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * m_sectorSize - 16, prevIV, 16) != 16)
{
cemuLog_log(LogType::Force, "Failed to read IV for raw FST block");
m_detectedCorruption = true;
return;
}
- ivOut = prevIV;
+ memcpy(ivOut, prevIV, 16);
}
}
}
@@ -986,10 +984,10 @@ FSTCachedRawBlock* FSTVolume::GetDecryptedRawBlock(uint32 clusterIndex, uint32 b
return nullptr;
}
// decrypt hash data
- NCrypto::AesIv iv{};
+ uint8 iv[16]{};
DetermineUnhashedBlockIV(clusterIndex, blockIndex, iv);
- std::copy(block->blockData.rawData.data() + m_sectorSize - NCrypto::AesIv::SIZE, block->blockData.rawData.data() + m_sectorSize, block->ivForNextBlock.iv);
- AES128_CBC_decrypt(block->blockData.rawData.data(), block->blockData.rawData.data(), m_sectorSize, m_partitionTitlekey.b, iv.iv);
+ memcpy(block->ivForNextBlock, block->blockData.rawData.data() + m_sectorSize - 16, 16);
+ AES128_CBC_decrypt(block->blockData.rawData.data(), block->blockData.rawData.data(), m_sectorSize, m_partitionTitlekey.b, iv);
// if this is the next block, then hash it
if(cluster.hasContentHash)
{
diff --git a/src/Cafe/Filesystem/FST/FST.h b/src/Cafe/Filesystem/FST/FST.h
index 26201c32..601799ce 100644
--- a/src/Cafe/Filesystem/FST/FST.h
+++ b/src/Cafe/Filesystem/FST/FST.h
@@ -83,6 +83,7 @@ public:
}
private:
+
/* FST data (in memory) */
enum class ClusterHashMode : uint8
{
@@ -192,7 +193,7 @@ private:
std::unordered_map m_cacheDecryptedHashedBlocks;
uint64 m_cacheAccessCounter{};
- void DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, NCrypto::AesIv& ivOut);
+ void DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, uint8 ivOut[16]);
struct FSTCachedRawBlock* GetDecryptedRawBlock(uint32 clusterIndex, uint32 blockIndex);
struct FSTCachedHashedBlock* GetDecryptedHashedBlock(uint32 clusterIndex, uint32 blockIndex);
diff --git a/src/Cafe/GraphicPack/GraphicPack2.cpp b/src/Cafe/GraphicPack/GraphicPack2.cpp
index 6ae05c5b..f21bb89d 100644
--- a/src/Cafe/GraphicPack/GraphicPack2.cpp
+++ b/src/Cafe/GraphicPack/GraphicPack2.cpp
@@ -821,7 +821,7 @@ void GraphicPack2::AddConstantsForCurrentPreset(ExpressionParser& ep)
}
}
-void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC, const char* virtualMountBase)
+void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC)
{
uint64 currentTitleId = CafeSystem::GetForegroundTitleId();
uint64 aocTitleId = (currentTitleId & 0xFFFFFFFFull) | 0x0005000c00000000ull;
@@ -836,7 +836,7 @@ void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC
}
else
{
- virtualMountPath = fs::path(virtualMountBase) / virtualMountPath;
+ virtualMountPath = fs::path("vol/content/") / virtualMountPath;
}
fscDeviceRedirect_add(virtualMountPath.generic_string(), it.file_size(), it.path().generic_string(), m_fs_priority);
}
@@ -861,7 +861,7 @@ void GraphicPack2::LoadReplacedFiles()
{
// setup redirections
fscDeviceRedirect_map();
- _iterateReplacedFiles(contentPath, false, "vol/content/");
+ _iterateReplacedFiles(contentPath, false);
}
// /aoc/
fs::path aocPath(gfxPackPath);
@@ -874,18 +874,7 @@ void GraphicPack2::LoadReplacedFiles()
aocTitleId |= 0x0005000c00000000ULL;
// setup redirections
fscDeviceRedirect_map();
- _iterateReplacedFiles(aocPath, true, nullptr);
- }
-
- // /code/
- fs::path codePath(gfxPackPath);
- codePath.append("code");
-
- if (fs::exists(codePath, ec))
- {
- // setup redirections
- fscDeviceRedirect_map();
- _iterateReplacedFiles(codePath, false, CafeSystem::GetInternalVirtualCodeFolder().c_str());
+ _iterateReplacedFiles(aocPath, true);
}
}
diff --git a/src/Cafe/GraphicPack/GraphicPack2.h b/src/Cafe/GraphicPack/GraphicPack2.h
index fc9603cd..9b6a86d4 100644
--- a/src/Cafe/GraphicPack/GraphicPack2.h
+++ b/src/Cafe/GraphicPack/GraphicPack2.h
@@ -260,7 +260,7 @@ private:
CustomShader LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type) const;
void ApplyShaderPresets(std::string& shader_source) const;
void LoadReplacedFiles();
- void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC, const char* virtualMountBase);
+ void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC);
// ram mappings
std::vector> m_ramMappings;
diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
index e84c9fda..37e374d6 100644
--- a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
+++ b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp
@@ -8,7 +8,6 @@
#include "gui/debugger/DebuggerWindow2.h"
#include "Cafe/OS/libs/coreinit/coreinit.h"
-#include "util/helpers/helpers.h"
#if BOOST_OS_WINDOWS
#include
@@ -137,6 +136,11 @@ void debugger_createCodeBreakpoint(uint32 address, uint8 bpType)
debugger_updateExecutionBreakpoint(address);
}
+void debugger_createExecuteBreakpoint(uint32 address)
+{
+ debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL);
+}
+
namespace coreinit
{
std::vector& OSGetSchedulerThreads();
@@ -290,23 +294,8 @@ void debugger_toggleExecuteBreakpoint(uint32 address)
}
else
{
- // create new execution breakpoint
- debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL);
- }
-}
-
-void debugger_toggleLoggingBreakpoint(uint32 address)
-{
- auto existingBP = debugger_getFirstBP(address, DEBUGGER_BP_T_LOGGING);
- if (existingBP)
- {
- // delete existing breakpoint
- debugger_deleteBreakpoint(existingBP);
- }
- else
- {
- // create new logging breakpoint
- debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_LOGGING);
+ // create new breakpoint
+ debugger_createExecuteBreakpoint(address);
}
}
@@ -549,48 +538,7 @@ void debugger_enterTW(PPCInterpreter_t* hCPU)
{
if (bp->bpType == DEBUGGER_BP_T_LOGGING && bp->enabled)
{
- std::string comment = !bp->comment.empty() ? boost::nowide::narrow(bp->comment) : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address);
-
- auto replacePlaceholders = [&](const std::string& prefix, const auto& formatFunc)
- {
- size_t pos = 0;
- while ((pos = comment.find(prefix, pos)) != std::string::npos)
- {
- size_t endPos = comment.find('}', pos);
- if (endPos == std::string::npos)
- break;
-
- try
- {
- if (int regNum = ConvertString(comment.substr(pos + prefix.length(), endPos - pos - prefix.length())); regNum >= 0 && regNum < 32)
- {
- std::string replacement = formatFunc(regNum);
- comment.replace(pos, endPos - pos + 1, replacement);
- pos += replacement.length();
- }
- else
- {
- pos = endPos + 1;
- }
- }
- catch (...)
- {
- pos = endPos + 1;
- }
- }
- };
-
- // Replace integer register placeholders {rX}
- replacePlaceholders("{r", [&](int regNum) {
- return fmt::format("0x{:08X}", hCPU->gpr[regNum]);
- });
-
- // Replace floating point register placeholders {fX}
- replacePlaceholders("{f", [&](int regNum) {
- return fmt::format("{}", hCPU->fpr[regNum].fpr);
- });
-
- std::string logName = "Breakpoint '" + comment + "'";
+ std::string logName = !bp->comment.empty() ? "Breakpoint '"+boost::nowide::narrow(bp->comment)+"'" : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address);
std::string logContext = fmt::format("Thread: {:08x} LR: 0x{:08x}", MEMPTR(coreinit::OSGetCurrentThread()).GetMPTR(), hCPU->spr.LR, cemuLog_advancedPPCLoggingEnabled() ? " Stack Trace:" : "");
cemuLog_log(LogType::Force, "[Debugger] {} was executed! {}", logName, logContext);
if (cemuLog_advancedPPCLoggingEnabled())
diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.h b/src/Cafe/HW/Espresso/Debugger/Debugger.h
index c220eb8a..249c47b8 100644
--- a/src/Cafe/HW/Espresso/Debugger/Debugger.h
+++ b/src/Cafe/HW/Espresso/Debugger/Debugger.h
@@ -100,8 +100,8 @@ extern debuggerState_t debuggerState;
// new API
DebuggerBreakpoint* debugger_getFirstBP(uint32 address);
void debugger_createCodeBreakpoint(uint32 address, uint8 bpType);
+void debugger_createExecuteBreakpoint(uint32 address);
void debugger_toggleExecuteBreakpoint(uint32 address); // create/remove execute breakpoint
-void debugger_toggleLoggingBreakpoint(uint32 address); // create/remove logging breakpoint
void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp);
void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite);
diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h
index 5e09763b..b3ae45c3 100644
--- a/src/Cafe/HW/Espresso/EspressoISA.h
+++ b/src/Cafe/HW/Espresso/EspressoISA.h
@@ -10,18 +10,6 @@ namespace Espresso
CR_BIT_INDEX_SO = 3,
};
- enum class PSQ_LOAD_TYPE
- {
- TYPE_F32 = 0,
- TYPE_UNUSED1 = 1,
- TYPE_UNUSED2 = 2,
- TYPE_UNUSED3 = 3,
- TYPE_U8 = 4,
- TYPE_U16 = 5,
- TYPE_S8 = 6,
- TYPE_S16 = 7,
- };
-
enum class PrimaryOpcode
{
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)
@@ -103,15 +91,13 @@ namespace Espresso
BCCTR = 528
};
- enum class Opcode31
+ enum class OPCODE_31
{
- TW = 4,
- MFTB = 371,
+
};
inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); };
inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); };
- inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); };
struct BOField
{
@@ -146,12 +132,6 @@ namespace Espresso
uint8 bo;
};
- // returns true if LK bit is set, only valid for branch instructions
- inline bool DecodeLK(uint32 opcode)
- {
- return (opcode & 1) != 0;
- }
-
inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK)
{
LI = opcode & 0x3fffffc;
@@ -203,7 +183,13 @@ namespace Espresso
_decodeForm_D_branch(opcode, BD, BO, BI, AA, LK);
}
- inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR
+ inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
+ {
+ // form XL (with BD field expected to be zero)
+ _decodeForm_XL(opcode, BO, BI, LK);
+ }
+
+ inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
{
// form XL (with BD field expected to be zero)
_decodeForm_XL(opcode, BO, BI, LK);
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
index 2fe07509..fe9316f0 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp
@@ -3,12 +3,12 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow)
{
if (hasOverflow)
{
- hCPU->xer_so = 1;
- hCPU->xer_ov = 1;
+ hCPU->spr.XER |= XER_SO;
+ hCPU->spr.XER |= XER_OV;
}
else
{
- hCPU->xer_ov = 0;
+ hCPU->spr.XER &= ~XER_OV;
}
}
@@ -41,7 +41,7 @@ static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_ADDO(PPCInterpreter_t* hCPU, uint32 opcode)
{
- // Don't Starve Giant Edition uses this instruction + BSO
+ // untested (Don't Starve Giant Edition uses this instruction + BSO)
PPC_OPC_TEMPL3_XO();
uint32 result = hCPU->gpr[rA] + hCPU->gpr[rB];
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(hCPU->gpr[rA], hCPU->gpr[rB], result));
@@ -113,6 +113,7 @@ static void PPCInterpreter_ADDEO(PPCInterpreter_t* hCPU, uint32 opcode)
else
hCPU->xer_ca = 0;
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, b, hCPU->gpr[rD]));
+ // update CR
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -129,7 +130,7 @@ static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode)
{
- sint32 rD, rA;
+ int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
@@ -144,7 +145,7 @@ static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode)
{
- sint32 rD, rA;
+ int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
@@ -154,13 +155,14 @@ static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
+ // update cr0 flags
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
}
static void PPCInterpreter_ADDIS(PPCInterpreter_t* hCPU, uint32 opcode)
{
- sint32 rD, rA;
+ int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm);
hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0) + imm;
@@ -183,23 +185,6 @@ static void PPCInterpreter_ADDZE(PPCInterpreter_t* hCPU, uint32 opcode)
PPCInterpreter_nextInstruction(hCPU);
}
-static void PPCInterpreter_ADDZEO(PPCInterpreter_t* hCPU, uint32 opcode)
-{
- PPC_OPC_TEMPL3_XO();
- PPC_ASSERT(rB == 0);
- uint32 a = hCPU->gpr[rA];
- uint32 ca = hCPU->xer_ca;
- hCPU->gpr[rD] = a + ca;
- PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, 0, hCPU->gpr[rD]));
- if ((a == 0xffffffff) && ca)
- hCPU->xer_ca = 1;
- else
- hCPU->xer_ca = 0;
- if (opHasRC())
- ppc_update_cr0(hCPU, hCPU->gpr[rD]);
- PPCInterpreter_nextInstruction(hCPU);
-}
-
static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
@@ -216,23 +201,6 @@ static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode)
PPCInterpreter_nextInstruction(hCPU);
}
-static void PPCInterpreter_ADDMEO(PPCInterpreter_t* hCPU, uint32 opcode)
-{
- PPC_OPC_TEMPL3_XO();
- PPC_ASSERT(rB == 0);
- uint32 a = hCPU->gpr[rA];
- uint32 ca = hCPU->xer_ca;
- hCPU->gpr[rD] = a + ca + 0xffffffff;
- PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, 0xffffffff, hCPU->gpr[rD]));
- if (a || ca)
- hCPU->xer_ca = 1;
- else
- hCPU->xer_ca = 0;
- if (opHasRC())
- ppc_update_cr0(hCPU, hCPU->gpr[rD]);
- PPCInterpreter_nextInstruction(hCPU);
-}
-
static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
@@ -278,7 +246,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
hCPU->gpr[rD] = ~a + b + 1;
- // update carry
+ // update xer
if (ppc_carry_3(~a, b, 1))
hCPU->xer_ca = 1;
else
@@ -292,7 +260,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_SUBFIC(PPCInterpreter_t* hCPU, uint32 opcode)
{
- sint32 rD, rA;
+ int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 a = hCPU->gpr[rA];
@@ -316,6 +284,7 @@ static void PPCInterpreter_SUBFE(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
+ // update cr0
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -335,6 +304,7 @@ static void PPCInterpreter_SUBFEO(PPCInterpreter_t* hCPU, uint32 opcode)
else
hCPU->xer_ca = 0;
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, b, result));
+ // update cr0
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -356,25 +326,9 @@ static void PPCInterpreter_SUBFZE(PPCInterpreter_t* hCPU, uint32 opcode)
PPCInterpreter_nextInstruction(hCPU);
}
-static void PPCInterpreter_SUBFZEO(PPCInterpreter_t* hCPU, uint32 opcode)
-{
- PPC_OPC_TEMPL3_XO();
- PPC_ASSERT(rB == 0);
- uint32 a = hCPU->gpr[rA];
- uint32 ca = hCPU->xer_ca;
- hCPU->gpr[rD] = ~a + ca;
- PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, 0, hCPU->gpr[rD]));
- if (a == 0 && ca)
- hCPU->xer_ca = 1;
- else
- hCPU->xer_ca = 0;
- if (opHasRC())
- ppc_update_cr0(hCPU, hCPU->gpr[rD]);
- PPCInterpreter_nextInstruction(hCPU);
-}
-
static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode)
{
+ // untested
PPC_OPC_TEMPL3_XO();
PPC_ASSERT(rB == 0);
uint32 a = hCPU->gpr[rA];
@@ -385,24 +339,7 @@ static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->xer_ca = 1;
else
hCPU->xer_ca = 0;
- if (opcode & PPC_OPC_RC)
- ppc_update_cr0(hCPU, hCPU->gpr[rD]);
- PPCInterpreter_nextInstruction(hCPU);
-}
-
-static void PPCInterpreter_SUBFMEO(PPCInterpreter_t* hCPU, uint32 opcode)
-{
- PPC_OPC_TEMPL3_XO();
- PPC_ASSERT(rB == 0);
- uint32 a = hCPU->gpr[rA];
- uint32 ca = hCPU->xer_ca;
- hCPU->gpr[rD] = ~a + 0xFFFFFFFF + ca;
- PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, 0xFFFFFFFF, hCPU->gpr[rD]));
- // update xer carry
- if (ppc_carry_3(~a, 0xFFFFFFFF, ca))
- hCPU->xer_ca = 1;
- else
- hCPU->xer_ca = 0;
+ // update cr0
if (opcode & PPC_OPC_RC)
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -415,8 +352,13 @@ static void PPCInterpreter_MULHW_(PPCInterpreter_t* hCPU, uint32 opcode)
sint64 b = (sint32)hCPU->gpr[rB];
sint64 c = a * b;
hCPU->gpr[rD] = ((uint64)c) >> 32;
- if (opHasRC())
+ if (opcode & PPC_OPC_RC) {
+ // update cr0 flags
+#ifdef CEMU_DEBUG_ASSERT
+ assert_dbg();
+#endif
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
+ }
PPCInterpreter_nextInstruction(hCPU);
}
@@ -467,14 +409,14 @@ static void PPCInterpreter_MULLI(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
- sint32 a = (sint32)hCPU->gpr[rA];
- sint32 b = (sint32)hCPU->gpr[rB];
+ sint32 a = hCPU->gpr[rA];
+ sint32 b = hCPU->gpr[rB];
if (b == 0)
- hCPU->gpr[rD] = a < 0 ? 0xFFFFFFFF : 0;
- else if (a == 0x80000000 && b == 0xFFFFFFFF)
- hCPU->gpr[rD] = 0xFFFFFFFF;
- else
- hCPU->gpr[rD] = a / b;
+ {
+ cemuLog_logDebug(LogType::Force, "Error: Division by zero! [{:08x}]", (uint32)hCPU->instructionPointer);
+ b++;
+ }
+ hCPU->gpr[rD] = a / b;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -483,23 +425,16 @@ static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
- sint32 a = (sint32)hCPU->gpr[rA];
- sint32 b = (sint32)hCPU->gpr[rB];
+ sint32 a = hCPU->gpr[rA];
+ sint32 b = hCPU->gpr[rB];
if (b == 0)
{
PPCInterpreter_setXerOV(hCPU, true);
- hCPU->gpr[rD] = a < 0 ? 0xFFFFFFFF : 0;
- }
- else if(a == 0x80000000 && b == 0xFFFFFFFF)
- {
- PPCInterpreter_setXerOV(hCPU, true);
- hCPU->gpr[rD] = 0xFFFFFFFF;
- }
- else
- {
- hCPU->gpr[rD] = a / b;
- PPCInterpreter_setXerOV(hCPU, false);
+ PPCInterpreter_nextInstruction(hCPU);
+ return;
}
+ hCPU->gpr[rD] = a / b;
+ PPCInterpreter_setXerOV(hCPU, false);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -508,14 +443,12 @@ static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
- uint32 a = hCPU->gpr[rA];
- uint32 b = hCPU->gpr[rB];
- if (b == 0)
- hCPU->gpr[rD] = 0;
- else if (a == 0x80000000 && b == 0xFFFFFFFF)
- hCPU->gpr[rD] = 0;
- else
- hCPU->gpr[rD] = a / b;
+ if (hCPU->gpr[rB] == 0)
+ {
+ PPCInterpreter_nextInstruction(hCPU);
+ return;
+ }
+ hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -524,23 +457,14 @@ static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_DIVWUO(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
- uint32 a = hCPU->gpr[rA];
- uint32 b = hCPU->gpr[rB];
- if (b == 0)
+ if (hCPU->gpr[rB] == 0)
{
PPCInterpreter_setXerOV(hCPU, true);
- hCPU->gpr[rD] = 0;
- }
- else if(a == 0x80000000 && b == 0xFFFFFFFF)
- {
- PPCInterpreter_setXerOV(hCPU, false);
- hCPU->gpr[rD] = 0;
- }
- else
- {
- hCPU->gpr[rD] = a / b;
- PPCInterpreter_setXerOV(hCPU, false);
+ PPCInterpreter_nextInstruction(hCPU);
+ return;
}
+ hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
+ PPCInterpreter_setXerOV(hCPU, false);
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@@ -567,13 +491,6 @@ static void PPCInterpreter_CRANDC(PPCInterpreter_t* hCPU, uint32 opcode)
PPCInterpreter_nextInstruction(hCPU);
}
-static void PPCInterpreter_CRNAND(PPCInterpreter_t* hCPU, uint32 opcode)
-{
- PPC_OPC_TEMPL_X_CR();
- ppc_setCRBit(hCPU, crD, (ppc_getCRBit(hCPU, crA)&ppc_getCRBit(hCPU, crB)) ^ 1);
- PPCInterpreter_nextInstruction(hCPU);
-}
-
static void PPCInterpreter_CROR(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL_X_CR();
@@ -931,7 +848,8 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
+ if ((hCPU->spr.XER & XER_SO) != 0)
+ hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -953,7 +871,8 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
+ if ((hCPU->spr.XER & XER_SO) != 0)
+ hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -976,7 +895,8 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
+ if (hCPU->spr.XER & XER_SO)
+ hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
PPCInterpreter_nextInstruction(hCPU);
}
@@ -999,7 +919,8 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
- hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
+ if (hCPU->spr.XER & XER_SO)
+ hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
PPCInterpreter_nextInstruction(hCPU);
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
index 2c99b84c..aed571d7 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp
@@ -32,7 +32,7 @@ espresso_frsqrte_entry_t frsqrteLookupTable[32] =
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
};
-ATTR_MS_ABI double frsqrte_espresso(double input)
+double frsqrte_espresso(double input)
{
unsigned long long x = *(unsigned long long*)&input;
@@ -111,7 +111,7 @@ espresso_fres_entry_t fresLookupTable[32] =
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
};
-ATTR_MS_ABI double fres_espresso(double input)
+double fres_espresso(double input)
{
// based on testing we know that fres uses only the first 15 bits of the mantissa
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp
index cf7ba195..24219e66 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp
@@ -2,70 +2,62 @@
#include "PPCInterpreterInternal.h"
#include "PPCInterpreterHelper.h"
-std::unordered_set s_unsupportedHLECalls;
+std::unordered_set sUnsupportedHLECalls;
void PPCInterpreter_handleUnsupportedHLECall(PPCInterpreter_t* hCPU)
{
const char* libFuncName = (char*)memory_getPointerFromVirtualOffset(hCPU->instructionPointer + 8);
std::string tempString = fmt::format("Unsupported lib call: {}", libFuncName);
- if (s_unsupportedHLECalls.find(tempString) == s_unsupportedHLECalls.end())
+ if (sUnsupportedHLECalls.find(tempString) == sUnsupportedHLECalls.end())
{
cemuLog_log(LogType::UnsupportedAPI, "{}", tempString);
- s_unsupportedHLECalls.emplace(tempString);
+ sUnsupportedHLECalls.emplace(tempString);
}
hCPU->gpr[3] = 0;
PPCInterpreter_nextInstruction(hCPU);
}
-static constexpr size_t HLE_TABLE_CAPACITY = 0x4000;
-HLECALL s_ppcHleTable[HLE_TABLE_CAPACITY]{};
-sint32 s_ppcHleTableWriteIndex = 0;
-std::mutex s_ppcHleTableMutex;
+std::vector* sPPCHLETable{};
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall, std::string hleName)
{
- std::unique_lock _l(s_ppcHleTableMutex);
- if (s_ppcHleTableWriteIndex >= HLE_TABLE_CAPACITY)
+ if (!sPPCHLETable)
+ sPPCHLETable = new std::vector();
+ for (sint32 i = 0; i < sPPCHLETable->size(); i++)
{
- cemuLog_log(LogType::Force, "HLE table is full");
- cemu_assert(false);
- }
- for (sint32 i = 0; i < s_ppcHleTableWriteIndex; i++)
- {
- if (s_ppcHleTable[i] == hleCall)
- {
+ if ((*sPPCHLETable)[i] == hleCall)
return i;
- }
}
- cemu_assert(s_ppcHleTableWriteIndex < HLE_TABLE_CAPACITY);
- s_ppcHleTable[s_ppcHleTableWriteIndex] = hleCall;
- HLEIDX funcIndex = s_ppcHleTableWriteIndex;
- s_ppcHleTableWriteIndex++;
- return funcIndex;
+ HLEIDX newFuncIndex = (sint32)sPPCHLETable->size();
+ sPPCHLETable->resize(sPPCHLETable->size() + 1);
+ (*sPPCHLETable)[newFuncIndex] = hleCall;
+ return newFuncIndex;
}
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex)
{
- if (funcIndex < 0 || funcIndex >= HLE_TABLE_CAPACITY)
+ if (funcIndex < 0 || funcIndex >= sPPCHLETable->size())
return nullptr;
- return s_ppcHleTable[funcIndex];
+ return sPPCHLETable->data()[funcIndex];
}
-std::mutex s_hleLogMutex;
+std::mutex g_hleLogMutex;
void PPCInterpreter_virtualHLE(PPCInterpreter_t* hCPU, unsigned int opcode)
{
uint32 hleFuncId = opcode & 0xFFFF;
- if (hleFuncId == 0xFFD0) [[unlikely]]
+ if (hleFuncId == 0xFFD0)
{
- s_hleLogMutex.lock();
+ g_hleLogMutex.lock();
PPCInterpreter_handleUnsupportedHLECall(hCPU);
- s_hleLogMutex.unlock();
+ g_hleLogMutex.unlock();
+ return;
}
else
{
// os lib function
- auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
+ cemu_assert(hleFuncId < sPPCHLETable->size());
+ auto hleCall = (*sPPCHLETable)[hleFuncId];
cemu_assert(hleCall);
hleCall(hCPU);
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp
index 547472ab..cacfa4a9 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp
@@ -428,6 +428,9 @@ public:
}
};
+uint32 testIP[100];
+uint32 testIPC = 0;
+
template
class PPCInterpreterContainer
{
@@ -463,10 +466,6 @@ public:
case 1: // virtual HLE
PPCInterpreter_virtualHLE(hCPU, opcode);
break;
- case 3:
- cemuLog_logDebug(LogType::Force, "Unsupported TWI instruction executed at {:08x}", hCPU->instructionPointer);
- PPCInterpreter_nextInstruction(hCPU);
- break;
case 4:
switch (PPC_getBits(opcode, 30, 5))
{
@@ -483,9 +482,8 @@ public:
PPCInterpreter_PS_CMPU1(hCPU, opcode);
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->0] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
+ debug_printf("Unknown execute %04X as [4->0] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
cemu_assert_unimplemented();
- hCPU->instructionPointer += 4;
break;
}
break;
@@ -511,9 +509,8 @@ public:
PPCInterpreter_PS_ABS(hCPU, opcode);
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->8] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
+ debug_printf("Unknown execute %04X as [4->8] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
cemu_assert_unimplemented();
- hCPU->instructionPointer += 4;
break;
}
break;
@@ -551,9 +548,8 @@ public:
PPCInterpreter_PS_MERGE11(hCPU, opcode);
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->16] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
- cemu_assert_unimplemented();
- hCPU->instructionPointer += 4;
+ debug_printf("Unknown execute %04X as [4->16] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
+ debugBreakpoint();
break;
}
break;
@@ -594,9 +590,8 @@ public:
PPCInterpreter_PS_NMADD(hCPU, opcode);
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4] at {:08x}", PPC_getBits(opcode, 30, 5), hCPU->instructionPointer);
+ debug_printf("Unknown execute %04X as [4] at %08X\n", PPC_getBits(opcode, 30, 5), hCPU->instructionPointer);
cemu_assert_unimplemented();
- hCPU->instructionPointer += 4;
break;
}
break;
@@ -628,15 +623,12 @@ public:
PPCInterpreter_BCX(hCPU, opcode);
break;
case 17:
- if (PPC_getBits(opcode, 30, 1) == 1)
- {
+ if (PPC_getBits(opcode, 30, 1) == 1) {
PPCInterpreter_SC(hCPU, opcode);
}
- else
- {
- cemuLog_logDebug(LogType::Force, "Unsupported Opcode [0x17 --> 0x0]");
+ else {
+ debug_printf("Unsupported Opcode [0x17 --> 0x0]\n");
cemu_assert_unimplemented();
- hCPU->instructionPointer += 4;
}
break;
case 18:
@@ -666,9 +658,6 @@ public:
case 193:
PPCInterpreter_CRXOR(hCPU, opcode);
break;
- case 225:
- PPCInterpreter_CRNAND(hCPU, opcode);
- break;
case 257:
PPCInterpreter_CRAND(hCPU, opcode);
break;
@@ -685,9 +674,8 @@ public:
PPCInterpreter_BCCTR(hCPU, opcode);
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [19] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
+ debug_printf("Unknown execute %04X as [19] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
cemu_assert_unimplemented();
- hCPU->instructionPointer += 4;
break;
}
break;
@@ -725,6 +713,9 @@ public:
PPCInterpreter_CMP(hCPU, opcode);
break;
case 4:
+ #ifdef CEMU_DEBUG_ASSERT
+ debug_printf("TW instruction executed at %08x\n", hCPU->instructionPointer);
+ #endif
PPCInterpreter_TW(hCPU, opcode);
break;
case 8:
@@ -904,12 +895,6 @@ public:
case 522:
PPCInterpreter_ADDCO(hCPU, opcode);
break;
- case 523: // 11 | OE
- PPCInterpreter_MULHWU_(hCPU, opcode); // OE is ignored
- break;
- case 533:
- PPCInterpreter_LSWX(hCPU, opcode);
- break;
case 534:
PPCInterpreter_LWBRX(hCPU, opcode);
break;
@@ -928,9 +913,6 @@ public:
case 567:
PPCInterpreter_LFSUX(hCPU, opcode);
break;
- case 587: // 75 | OE
- PPCInterpreter_MULHW_(hCPU, opcode); // OE is ignored for MULHW
- break;
case 595:
PPCInterpreter_MFSR(hCPU, opcode);
break;
@@ -961,30 +943,15 @@ public:
case 663:
PPCInterpreter_STFSX(hCPU, opcode);
break;
- case 661:
- PPCInterpreter_STSWX(hCPU, opcode);
- break;
case 695:
PPCInterpreter_STFSUX(hCPU, opcode);
break;
- case 712: // 200 | OE
- PPCInterpreter_SUBFZEO(hCPU, opcode);
- break;
- case 714: // 202 | OE
- PPCInterpreter_ADDZEO(hCPU, opcode);
- break;
case 725:
PPCInterpreter_STSWI(hCPU, opcode);
break;
case 727:
PPCInterpreter_STFDX(hCPU, opcode);
break;
- case 744: // 232 | OE
- PPCInterpreter_SUBFMEO(hCPU, opcode);
- break;
- case 746: // 234 | OE
- PPCInterpreter_ADDMEO(hCPU, opcode);
- break;
case 747:
PPCInterpreter_MULLWO(hCPU, opcode);
break;
@@ -1031,8 +998,10 @@ public:
PPCInterpreter_DCBZ(hCPU, opcode);
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [31] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
- cemu_assert_unimplemented();
+ debug_printf("Unknown execute %04X as [31] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
+ #ifdef CEMU_DEBUG_ASSERT
+ assert_dbg();
+ #endif
hCPU->instructionPointer += 4;
break;
}
@@ -1115,7 +1084,7 @@ public:
case 57:
PPCInterpreter_PSQ_LU(hCPU, opcode);
break;
- case 59: // opcode category
+ case 59: //Opcode category
switch (PPC_getBits(opcode, 30, 5))
{
case 18:
@@ -1146,9 +1115,8 @@ public:
PPCInterpreter_FNMADDS(hCPU, opcode);
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [59] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
+ debug_printf("Unknown execute %04X as [59] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
cemu_assert_unimplemented();
- hCPU->instructionPointer += 4;
break;
}
break;
@@ -1227,19 +1195,18 @@ public:
case 583:
PPCInterpreter_MFFS(hCPU, opcode);
break;
- case 711:
+ case 711: // IBM documentation has this wrong as 771?
PPCInterpreter_MTFSF(hCPU, opcode);
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [63] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
+ debug_printf("Unknown execute %04X as [63] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
cemu_assert_unimplemented();
- PPCInterpreter_nextInstruction(hCPU);
break;
}
}
break;
default:
- cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} at {:08x}\n", PPC_getBits(opcode, 5, 6), (unsigned int)hCPU->instructionPointer);
+ debug_printf("Unknown execute %04X at %08X\n", PPC_getBits(opcode, 5, 6), (unsigned int)hCPU->instructionPointer);
cemu_assert_unimplemented();
}
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
index 896fd21c..bc8458d9 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h
@@ -50,9 +50,9 @@
#define CR_BIT_EQ 2
#define CR_BIT_SO 3
+#define XER_SO (1<<31) // summary overflow bit
+#define XER_OV (1<<30) // overflow bit
#define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8
-#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO
-#define XER_BIT_OV (30)
// FPSCR
#define FPSCR_VXSNAN (1<<24)
@@ -118,8 +118,7 @@
static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r)
{
- cemu_assert_debug(hCPU->xer_so <= 1);
- hCPU->cr[CR_BIT_SO] = hCPU->xer_so;
+ hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0;
hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0);
hCPU->cr[CR_BIT_EQ] = (r == 0);
hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0
@@ -191,8 +190,8 @@ inline double roundTo25BitAccuracy(double d)
return *(double*)&v;
}
-ATTR_MS_ABI double fres_espresso(double input);
-ATTR_MS_ABI double frsqrte_espresso(double input);
+double fres_espresso(double input);
+double frsqrte_espresso(double input);
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
index ea7bb038..694e05e6 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp
@@ -31,7 +31,7 @@ static void PPCInterpreter_STW(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STWU(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS;
+ int rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU32(hCPU, hCPU->gpr[rA] + imm, hCPU->gpr[rS]);
@@ -42,7 +42,7 @@ static void PPCInterpreter_STWU(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STWX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, rB;
+ int rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], hCPU->gpr[rS]);
PPCInterpreter_nextInstruction(hCPU);
@@ -85,8 +85,7 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode)
ppc_setCRBit(hCPU, CR_BIT_GT, 0);
ppc_setCRBit(hCPU, CR_BIT_EQ, 1);
}
- cemu_assert_debug(hCPU->xer_so <= 1);
- ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so);
+ ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0);
// remove reservation
hCPU->reservedMemAddr = 0;
hCPU->reservedMemValue = 0;
@@ -103,7 +102,7 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STWUX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, rB;
+ int rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], hCPU->gpr[rS]);
if (rA)
@@ -113,7 +112,7 @@ static void PPCInterpreter_STWUX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STWBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, rB;
+ int rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], _swapEndianU32(hCPU->gpr[rS]));
PPCInterpreter_nextInstruction(hCPU);
@@ -121,7 +120,7 @@ static void PPCInterpreter_STWBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STMW(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rS, rA;
+ int rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm;
@@ -136,7 +135,7 @@ static void PPCInterpreter_STMW(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STH(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS;
+ int rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint16)hCPU->gpr[rS]);
@@ -145,7 +144,7 @@ static void PPCInterpreter_STH(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STHU(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS;
+ int rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint16)hCPU->gpr[rS]);
@@ -156,7 +155,7 @@ static void PPCInterpreter_STHU(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STHX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, rB;
+ int rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint16)hCPU->gpr[rS]);
PPCInterpreter_nextInstruction(hCPU);
@@ -164,7 +163,7 @@ static void PPCInterpreter_STHX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STHUX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, rB;
+ int rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint16)hCPU->gpr[rS]);
if (rA)
@@ -174,7 +173,7 @@ static void PPCInterpreter_STHUX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STHBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, rB;
+ int rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], _swapEndianU16((uint16)hCPU->gpr[rS]));
PPCInterpreter_nextInstruction(hCPU);
@@ -182,7 +181,7 @@ static void PPCInterpreter_STHBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STB(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS;
+ int rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint8)hCPU->gpr[rS]);
@@ -191,7 +190,7 @@ static void PPCInterpreter_STB(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STBU(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS;
+ int rA, rS;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
ppcItpCtrl::ppcMem_writeDataU8(hCPU, hCPU->gpr[rA] + imm, (uint8)hCPU->gpr[rS]);
@@ -201,7 +200,7 @@ static void PPCInterpreter_STBU(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STBX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, rB;
+ int rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint8)hCPU->gpr[rS]);
PPCInterpreter_nextInstruction(hCPU);
@@ -209,7 +208,7 @@ static void PPCInterpreter_STBX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STBUX(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, rB;
+ int rA, rS, rB;
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint8)hCPU->gpr[rS]);
if (rA)
@@ -219,7 +218,7 @@ static void PPCInterpreter_STBUX(PPCInterpreter_t* hCPU, uint32 Opcode)
static void PPCInterpreter_STSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
{
- sint32 rA, rS, nb;
+ int rA, rS, nb;
PPC_OPC_TEMPL_X(Opcode, rS, rA, nb);
if (nb == 0) nb = 32;
uint32 ea = rA ? hCPU->gpr[rA] : 0;
@@ -229,39 +228,7 @@ static void PPCInterpreter_STSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
{
if (i == 0)
{
- r = rS < 32 ? hCPU->gpr[rS] : 0; // what happens if rS is out of bounds?
- rS++;
- rS %= 32;
- i = 4;
- }
- ppcItpCtrl::ppcMem_writeDataU8(hCPU, ea, (r >> 24));
- r <<= 8;
- ea++;
- i--;
- nb--;
- }
- PPCInterpreter_nextInstruction(hCPU);
-}
-
-static void PPCInterpreter_STSWX(PPCInterpreter_t* hCPU, uint32 Opcode)
-{
- sint32 rA, rS, rB;
- PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
- sint32 nb = hCPU->spr.XER&0x7F;
- if (nb == 0)
- {
- PPCInterpreter_nextInstruction(hCPU);
- return;
- }
- uint32 ea = rA ? hCPU->gpr[rA] : 0;
- ea += hCPU->gpr[rB];
- uint32 r = 0;
- int i = 0;
- while (nb > 0)
- {
- if (i == 0)
- {
- r = rS < 32 ? hCPU->gpr[rS] : 0; // what happens if rS is out of bounds?
+ r = hCPU->gpr[rS];
rS++;
rS %= 32;
i = 4;
@@ -492,51 +459,8 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
PPC_OPC_TEMPL_X(Opcode, rD, rA, nb);
if (nb == 0)
nb = 32;
- uint32 ea = rA ? hCPU->gpr[rA] : 0;
- uint32 r = 0;
- int i = 4;
- uint8 v;
- while (nb>0)
- {
- if (i == 0)
- {
- i = 4;
- if(rD < 32)
- hCPU->gpr[rD] = r;
- rD++;
- rD %= 32;
- r = 0;
- }
- v = ppcItpCtrl::ppcMem_readDataU8(hCPU, ea);
- r <<= 8;
- r |= v;
- ea++;
- i--;
- nb--;
- }
- while (i)
- {
- r <<= 8;
- i--;
- }
- if(rD < 32)
- hCPU->gpr[rD] = r;
- PPCInterpreter_nextInstruction(hCPU);
-}
-static void PPCInterpreter_LSWX(PPCInterpreter_t* hCPU, uint32 Opcode)
-{
- sint32 rA, rD, rB;
- PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
- // byte count comes from XER
- uint32 nb = (hCPU->spr.XER>>0)&0x7F;
- if (nb == 0)
- {
- PPCInterpreter_nextInstruction(hCPU);
- return; // no-op
- }
uint32 ea = rA ? hCPU->gpr[rA] : 0;
- ea += hCPU->gpr[rB];
uint32 r = 0;
int i = 4;
uint8 v;
@@ -545,8 +469,7 @@ static void PPCInterpreter_LSWX(PPCInterpreter_t* hCPU, uint32 Opcode)
if (i == 0)
{
i = 4;
- if(rD < 32)
- hCPU->gpr[rD] = r;
+ hCPU->gpr[rD] = r;
rD++;
rD %= 32;
r = 0;
@@ -563,8 +486,7 @@ static void PPCInterpreter_LSWX(PPCInterpreter_t* hCPU, uint32 Opcode)
r <<= 8;
i--;
}
- if(rD < 32)
- hCPU->gpr[rD] = r;
+ hCPU->gpr[rD] = r;
PPCInterpreter_nextInstruction(hCPU);
}
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
index 4449f135..ace1601f 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp
@@ -63,25 +63,16 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU)
{
uint32 xerValue = hCPU->spr.XER;
- xerValue &= ~(1 << XER_BIT_CA);
- xerValue &= ~(1 << XER_BIT_SO);
- xerValue &= ~(1 << XER_BIT_OV);
- if (hCPU->xer_ca)
- xerValue |= (1 << XER_BIT_CA);
- if (hCPU->xer_so)
- xerValue |= (1 << XER_BIT_SO);
- if (hCPU->xer_ov)
- xerValue |= (1 << XER_BIT_OV);
+ xerValue &= ~(1<xer_ca )
+ xerValue |= (1<spr.XER = v & XER_MASK;
- hCPU->xer_ca = (v >> XER_BIT_CA) & 1;
- hCPU->xer_so = (v >> XER_BIT_SO) & 1;
- hCPU->xer_ov = (v >> XER_BIT_OV) & 1;
+ hCPU->spr.XER = v;
+ hCPU->xer_ca = (v>>XER_BIT_CA)&1;
}
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU)
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
index 7809a01d..12f86427 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp
@@ -5,6 +5,7 @@
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
#include "../Recompiler/PPCRecompiler.h"
+#include "../Recompiler/PPCRecompilerX64.h"
#include
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
@@ -93,6 +94,7 @@ void PPCInterpreter_MTCRF(PPCInterpreter_t* hCPU, uint32 Opcode)
{
// frequently used by GCC compiled code (e.g. SM64 port)
// tested
+
uint32 rS;
uint32 crfMask;
PPC_OPC_TEMPL_XFX(Opcode, rS, crfMask);
diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp
index 9bfcd53d..718162be 100644
--- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp
+++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp
@@ -68,8 +68,6 @@ static void PPCInterpreter_TW(PPCInterpreter_t* hCPU, uint32 opcode)
PPC_OPC_TEMPL_X(opcode, to, rA, rB);
cemu_assert_debug(to == 0);
- if(to != 0)
- PPCInterpreter_nextInstruction(hCPU);
if (rA == DEBUGGER_BP_T_DEBUGGER)
debugger_enterTW(hCPU);
diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h
index fd943d39..c315ed0e 100644
--- a/src/Cafe/HW/Espresso/PPCState.h
+++ b/src/Cafe/HW/Espresso/PPCState.h
@@ -49,12 +49,12 @@ struct PPCInterpreter_t
uint32 fpscr;
uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..)
uint8 xer_ca; // carry from xer
- uint8 xer_so;
- uint8 xer_ov;
+ uint8 LSQE;
+ uint8 PSE;
// thread remaining cycles
sint32 remainingCycles; // if this value goes below zero, the next thread is scheduled
sint32 skippedCycles; // number of skipped cycles
- struct
+ struct
{
uint32 LR;
uint32 CTR;
@@ -67,10 +67,9 @@ struct PPCInterpreter_t
uint32 reservedMemValue;
// temporary storage for recompiler
FPR_t temporaryFPR[8];
- uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away
- uint32 temporaryGPR_reg[4];
+ uint32 temporaryGPR[4];
// values below this are not used by Cafe OS usermode
- struct
+ struct
{
uint32 fpecr; // is this the same register as fpscr ?
uint32 DEC;
@@ -85,7 +84,7 @@ struct PPCInterpreter_t
// DMA
uint32 dmaU;
uint32 dmaL;
- // MMU
+ // MMU
uint32 dbatU[8];
uint32 dbatL[8];
uint32 ibatU[8];
@@ -93,8 +92,6 @@ struct PPCInterpreter_t
uint32 sr[16];
uint32 sdr1;
}sprExtended;
- uint8 LSQE;
- uint8 PSE;
// global CPU values
PPCInterpreterGlobal_t* global;
// interpreter control
@@ -230,9 +227,9 @@ static inline float flushDenormalToZero(float f)
// HLE interface
-using HLECALL = void(*)(PPCInterpreter_t*);
-using HLEIDX = sint32;
+typedef void(*HLECALL)(PPCInterpreter_t* hCPU);
+typedef sint32 HLEIDX;
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall, std::string hleName);
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex);
diff --git a/src/Cafe/HW/Espresso/PPCTimer.cpp b/src/Cafe/HW/Espresso/PPCTimer.cpp
index 257973a6..c27c94ee 100644
--- a/src/Cafe/HW/Espresso/PPCTimer.cpp
+++ b/src/Cafe/HW/Espresso/PPCTimer.cpp
@@ -1,4 +1,5 @@
#include "Cafe/HW/Espresso/Const.h"
+#include "asm/x64util.h"
#include "config/ActiveSettings.h"
#include "util/helpers/fspinlock.h"
#include "util/highresolutiontimer/HighResolutionTimer.h"
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
deleted file mode 100644
index 728460a4..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
+++ /dev/null
@@ -1,1695 +0,0 @@
-#include "BackendAArch64.h"
-
-#pragma push_macro("CSIZE")
-#undef CSIZE
-#include
-#pragma pop_macro("CSIZE")
-#include
-
-#include
-
-#include "../PPCRecompiler.h"
-#include "Common/precompiled.h"
-#include "Common/cpu_features.h"
-#include "HW/Espresso/Interpreter/PPCInterpreterInternal.h"
-#include "HW/Espresso/Interpreter/PPCInterpreterHelper.h"
-#include "HW/Espresso/PPCState.h"
-
-using namespace Xbyak_aarch64;
-
-constexpr uint32 TEMP_GPR_1_ID = 25;
-constexpr uint32 TEMP_GPR_2_ID = 26;
-constexpr uint32 PPC_RECOMPILER_INSTANCE_DATA_REG_ID = 27;
-constexpr uint32 MEMORY_BASE_REG_ID = 28;
-constexpr uint32 HCPU_REG_ID = 29;
-
-constexpr uint32 TEMP_FPR_ID = 31;
-
-struct FPReg
-{
- explicit FPReg(size_t index)
- : index(index), VReg(index), QReg(index), DReg(index), SReg(index), HReg(index), BReg(index)
- {
- }
- const size_t index;
- const VReg VReg;
- const QReg QReg;
- const DReg DReg;
- const SReg SReg;
- const HReg HReg;
- const BReg BReg;
-};
-
-struct GPReg
-{
- explicit GPReg(size_t index)
- : index(index), XReg(index), WReg(index)
- {
- }
- const size_t index;
- const XReg XReg;
- const WReg WReg;
-};
-
-static const XReg HCPU_REG{HCPU_REG_ID}, PPC_REC_INSTANCE_REG{PPC_RECOMPILER_INSTANCE_DATA_REG_ID}, MEM_BASE_REG{MEMORY_BASE_REG_ID};
-static const GPReg TEMP_GPR1{TEMP_GPR_1_ID};
-static const GPReg TEMP_GPR2{TEMP_GPR_2_ID};
-static const GPReg LR{TEMP_GPR_2_ID};
-
-static const FPReg TEMP_FPR{TEMP_FPR_ID};
-
-static const util::Cpu s_cpu;
-
-class AArch64Allocator : public Allocator
-{
- private:
-#ifdef XBYAK_USE_MMAP_ALLOCATOR
- inline static MmapAllocator s_allocator;
-#else
- inline static Allocator s_allocator;
-#endif
- Allocator* m_allocatorImpl;
- bool m_freeDisabled = false;
-
- public:
- AArch64Allocator()
- : m_allocatorImpl(reinterpret_cast(&s_allocator)) {}
-
- uint32* alloc(size_t size) override
- {
- return m_allocatorImpl->alloc(size);
- }
-
- void setFreeDisabled(bool disabled)
- {
- m_freeDisabled = disabled;
- }
-
- void free(uint32* p) override
- {
- if (!m_freeDisabled)
- m_allocatorImpl->free(p);
- }
-
- [[nodiscard]] bool useProtect() const override
- {
- return !m_freeDisabled && m_allocatorImpl->useProtect();
- }
-};
-
-struct UnconditionalJumpInfo
-{
- IMLSegment* target;
-};
-
-struct ConditionalRegJumpInfo
-{
- IMLSegment* target;
- WReg regBool;
- bool mustBeTrue;
-};
-
-struct NegativeRegValueJumpInfo
-{
- IMLSegment* target;
- WReg regValue;
-};
-
-using JumpInfo = std::variant<
- UnconditionalJumpInfo,
- ConditionalRegJumpInfo,
- NegativeRegValueJumpInfo>;
-
-struct AArch64GenContext_t : CodeGenerator
-{
- explicit AArch64GenContext_t(Allocator* allocator = nullptr);
- void enterRecompilerCode();
- void leaveRecompilerCode();
-
- void r_name(IMLInstruction* imlInstruction);
- void name_r(IMLInstruction* imlInstruction);
- bool r_s32(IMLInstruction* imlInstruction);
- bool r_r(IMLInstruction* imlInstruction);
- bool r_r_s32(IMLInstruction* imlInstruction);
- bool r_r_s32_carry(IMLInstruction* imlInstruction);
- bool r_r_r(IMLInstruction* imlInstruction);
- bool r_r_r_carry(IMLInstruction* imlInstruction);
- void compare(IMLInstruction* imlInstruction);
- void compare_s32(IMLInstruction* imlInstruction);
- bool load(IMLInstruction* imlInstruction, bool indexed);
- bool store(IMLInstruction* imlInstruction, bool indexed);
- void atomic_cmp_store(IMLInstruction* imlInstruction);
- bool macro(IMLInstruction* imlInstruction);
- void call_imm(IMLInstruction* imlInstruction);
- bool fpr_load(IMLInstruction* imlInstruction, bool indexed);
- bool fpr_store(IMLInstruction* imlInstruction, bool indexed);
- void fpr_r_r(IMLInstruction* imlInstruction);
- void fpr_r_r_r(IMLInstruction* imlInstruction);
- void fpr_r_r_r_r(IMLInstruction* imlInstruction);
- void fpr_r(IMLInstruction* imlInstruction);
- void fpr_compare(IMLInstruction* imlInstruction);
- void cjump(IMLInstruction* imlInstruction, IMLSegment* imlSegment);
- void jump(IMLSegment* imlSegment);
- void conditionalJumpCycleCheck(IMLSegment* imlSegment);
-
- static constexpr size_t MAX_JUMP_INSTR_COUNT = 2;
- std::list> jumps;
- void prepareJump(JumpInfo&& jumpInfo)
- {
- jumps.emplace_back(getSize(), jumpInfo);
- for (int i = 0; i < MAX_JUMP_INSTR_COUNT; ++i)
- nop();
- }
-
- std::map segmentStarts;
- void storeSegmentStart(IMLSegment* imlSegment)
- {
- segmentStarts[imlSegment] = getSize();
- }
-
- bool processAllJumps()
- {
- for (auto jump : jumps)
- {
- auto jumpStart = jump.first;
- auto jumpInfo = jump.second;
- bool success = std::visit(
- [&, this](const auto& jump) {
- setSize(jumpStart);
- sint64 targetAddress = segmentStarts.at(jump.target);
- sint64 addressOffset = targetAddress - jumpStart;
- return handleJump(addressOffset, jump);
- },
- jumpInfo);
- if (!success)
- {
- return false;
- }
- }
- return true;
- }
-
- bool handleJump(sint64 addressOffset, const UnconditionalJumpInfo& jump)
- {
- // in +/-128MB
- if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
- {
- b(addressOffset);
- return true;
- }
-
- cemu_assert_suspicious();
-
- return false;
- }
-
- bool handleJump(sint64 addressOffset, const ConditionalRegJumpInfo& jump)
- {
- bool mustBeTrue = jump.mustBeTrue;
-
- // in +/-32KB
- if (-0x8000 <= addressOffset && addressOffset <= 0x7fff)
- {
- if (mustBeTrue)
- tbnz(jump.regBool, 0, addressOffset);
- else
- tbz(jump.regBool, 0, addressOffset);
- return true;
- }
-
- // in +/-1MB
- if (-0x100000 <= addressOffset && addressOffset <= 0xfffff)
- {
- if (mustBeTrue)
- cbnz(jump.regBool, addressOffset);
- else
- cbz(jump.regBool, addressOffset);
- return true;
- }
-
- Label skipJump;
- if (mustBeTrue)
- tbz(jump.regBool, 0, skipJump);
- else
- tbnz(jump.regBool, 0, skipJump);
- addressOffset -= 4;
-
- // in +/-128MB
- if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
- {
- b(addressOffset);
- L(skipJump);
- return true;
- }
-
- cemu_assert_suspicious();
-
- return false;
- }
-
- bool handleJump(sint64 addressOffset, const NegativeRegValueJumpInfo& jump)
- {
- // in +/-32KB
- if (-0x8000 <= addressOffset && addressOffset <= 0x7fff)
- {
- tbnz(jump.regValue, 31, addressOffset);
- return true;
- }
-
- // in +/-1MB
- if (-0x100000 <= addressOffset && addressOffset <= 0xfffff)
- {
- tst(jump.regValue, 0x80000000);
- addressOffset -= 4;
- bne(addressOffset);
- return true;
- }
-
- Label skipJump;
- tbz(jump.regValue, 31, skipJump);
- addressOffset -= 4;
-
- // in +/-128MB
- if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff)
- {
- b(addressOffset);
- L(skipJump);
- return true;
- }
-
- cemu_assert_suspicious();
-
- return false;
- }
-};
-
-template T>
-T fpReg(const IMLReg& imlReg)
-{
- cemu_assert_debug(imlReg.GetRegFormat() == IMLRegFormat::F64);
- auto regId = imlReg.GetRegID();
- cemu_assert_debug(regId >= IMLArchAArch64::PHYSREG_FPR_BASE && regId < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT);
- return T(regId - IMLArchAArch64::PHYSREG_FPR_BASE);
-}
-
-template T>
-T gpReg(const IMLReg& imlReg)
-{
- auto regFormat = imlReg.GetRegFormat();
- if (std::is_same_v)
- cemu_assert_debug(regFormat == IMLRegFormat::I32);
- else if (std::is_same_v)
- cemu_assert_debug(regFormat == IMLRegFormat::I64);
- else
- cemu_assert_unimplemented();
-
- auto regId = imlReg.GetRegID();
- cemu_assert_debug(regId >= IMLArchAArch64::PHYSREG_GPR_BASE && regId < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT);
- return T(regId - IMLArchAArch64::PHYSREG_GPR_BASE);
-}
-
-template To, std::derived_from From>
-To aliasAs(const From& reg)
-{
- return To(reg.getIdx());
-}
-
-template To, std::derived_from From>
-To aliasAs(const From& reg)
-{
- return To(reg.getIdx());
-}
-
-AArch64GenContext_t::AArch64GenContext_t(Allocator* allocator)
- : CodeGenerator(DEFAULT_MAX_CODE_SIZE, AutoGrow, allocator)
-{
-}
-
-constexpr uint64 ones(uint32 size)
-{
- return (size == 64) ? 0xffffffffffffffff : ((uint64)1 << size) - 1;
-}
-
-constexpr bool isAdrImmValidFPR(sint32 imm, uint32 bits)
-{
- uint32 times = bits / 8;
- uint32 sh = std::countr_zero(times);
- return (0 <= imm && imm <= 4095 * times) && ((uint64)imm & ones(sh)) == 0;
-}
-
-constexpr bool isAdrImmValidGPR(sint32 imm, uint32 bits = 32)
-{
- uint32 size = std::countr_zero(bits / 8u);
- sint32 times = 1 << size;
- return (0 <= imm && imm <= 4095 * times) && ((uint64)imm & ones(size)) == 0;
-}
-
-constexpr bool isAdrImmRangeValid(sint32 rangeStart, sint32 rangeOffset, sint32 bits, std::invocable auto check)
-{
- for (sint32 i = rangeStart; i <= rangeStart + rangeOffset; i += bits / 8)
- if (!check(i, bits))
- return false;
- return true;
-}
-
-constexpr bool isAdrImmRangeValidGPR(sint32 rangeStart, sint32 rangeOffset, sint32 bits = 32)
-{
- return isAdrImmRangeValid(rangeStart, rangeOffset, bits, isAdrImmValidGPR);
-}
-
-constexpr bool isAdrImmRangeValidFpr(sint32 rangeStart, sint32 rangeOffset, sint32 bits)
-{
- return isAdrImmRangeValid(rangeStart, rangeOffset, bits, isAdrImmValidFPR);
-}
-
-// Verify that all of the offsets for the PPCInterpreter_t members that we use in r_name/name_r have a valid imm value for AdrUimm
-static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, gpr), sizeof(uint32) * 31));
-static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.LR)));
-static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.CTR)));
-static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.XER)));
-static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, spr.UGQR), sizeof(PPCInterpreter_t::spr.UGQR[0]) * (SPR_UGQR7 - SPR_UGQR0)));
-static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, temporaryGPR_reg), sizeof(uint32) * 3));
-static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, xer_ca), 8));
-static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, xer_so), 8));
-static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, cr), PPCREC_NAME_CR_LAST - PPCREC_NAME_CR, 8));
-static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, reservedMemAddr)));
-static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, reservedMemValue)));
-static_assert(isAdrImmRangeValidFpr(offsetof(PPCInterpreter_t, fpr), sizeof(FPR_t) * 63, 64));
-static_assert(isAdrImmRangeValidFpr(offsetof(PPCInterpreter_t, temporaryFPR), sizeof(FPR_t) * 7, 128));
-
-void AArch64GenContext_t::r_name(IMLInstruction* imlInstruction)
-{
- uint32 name = imlInstruction->op_r_name.name;
-
- if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
- {
- XReg regRXReg = gpReg(imlInstruction->op_r_name.regR);
- WReg regR = aliasAs(regRXReg);
- if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
- {
- ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)));
- }
- else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
- {
- uint32 sprIndex = (name - PPCREC_NAME_SPR0);
- if (sprIndex == SPR_LR)
- ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
- else if (sprIndex == SPR_CTR)
- ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.CTR)));
- else if (sprIndex == SPR_XER)
- ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.XER)));
- else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
- ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0)));
- else
- cemu_assert_suspicious();
- }
- else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
- {
- ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)));
- }
- else if (name == PPCREC_NAME_XER_CA)
- {
- ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_ca)));
- }
- else if (name == PPCREC_NAME_XER_SO)
- {
- ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_so)));
- }
- else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
- {
- ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)));
- }
- else if (name == PPCREC_NAME_CPU_MEMRES_EA)
- {
- ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemAddr)));
- }
- else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
- {
- ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemValue)));
- }
- else
- {
- cemu_assert_suspicious();
- }
- }
- else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
- {
- auto imlRegR = imlInstruction->op_r_name.regR;
-
- if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
- {
- uint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
- uint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
- uint32 offset = offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0);
- ldr(fpReg(imlRegR), AdrUimm(HCPU_REG, offset));
- }
- else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
- {
- ldr(fpReg(imlRegR), AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)));
- }
- else
- {
- cemu_assert_suspicious();
- }
- }
- else
- {
- cemu_assert_suspicious();
- }
-}
-
-void AArch64GenContext_t::name_r(IMLInstruction* imlInstruction)
-{
- uint32 name = imlInstruction->op_r_name.name;
-
- if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
- {
- XReg regRXReg = gpReg(imlInstruction->op_r_name.regR);
- WReg regR = aliasAs(regRXReg);
- if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
- {
- str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)));
- }
- else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
- {
- uint32 sprIndex = (name - PPCREC_NAME_SPR0);
- if (sprIndex == SPR_LR)
- str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
- else if (sprIndex == SPR_CTR)
- str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.CTR)));
- else if (sprIndex == SPR_XER)
- str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.XER)));
- else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
- str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0)));
- else
- cemu_assert_suspicious();
- }
- else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
- {
- str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)));
- }
- else if (name == PPCREC_NAME_XER_CA)
- {
- strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_ca)));
- }
- else if (name == PPCREC_NAME_XER_SO)
- {
- strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_so)));
- }
- else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
- {
- strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)));
- }
- else if (name == PPCREC_NAME_CPU_MEMRES_EA)
- {
- str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemAddr)));
- }
- else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
- {
- str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemValue)));
- }
- else
- {
- cemu_assert_suspicious();
- }
- }
- else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
- {
- auto imlRegR = imlInstruction->op_r_name.regR;
- if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
- {
- uint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
- uint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
- sint32 offset = offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double);
- str(fpReg(imlRegR), AdrUimm(HCPU_REG, offset));
- }
- else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
- {
- str(fpReg(imlRegR), AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)));
- }
- else
- {
- cemu_assert_suspicious();
- }
- }
- else
- {
- cemu_assert_suspicious();
- }
-}
-
-bool AArch64GenContext_t::r_r(IMLInstruction* imlInstruction)
-{
- WReg regR = gpReg(imlInstruction->op_r_r.regR);
- WReg regA = gpReg(imlInstruction->op_r_r.regA);
-
- if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
- {
- mov(regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
- {
- rev(regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32)
- {
- sxtb(regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
- {
- sxth(regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_NOT)
- {
- mvn(regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
- {
- neg(regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_CNTLZW)
- {
- clz(regR, regA);
- }
- else
- {
- cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r(): Unsupported operation {:x}", imlInstruction->operation);
- return false;
- }
- return true;
-}
-
-bool AArch64GenContext_t::r_s32(IMLInstruction* imlInstruction)
-{
- sint32 imm32 = imlInstruction->op_r_immS32.immS32;
- WReg reg = gpReg(imlInstruction->op_r_immS32.regR);
-
- if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
- {
- mov(reg, imm32);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE)
- {
- ror(reg, reg, 32 - (imm32 & 0x1f));
- }
- else
- {
- cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_s32(): Unsupported operation {:x}", imlInstruction->operation);
- return false;
- }
- return true;
-}
-
-bool AArch64GenContext_t::r_r_s32(IMLInstruction* imlInstruction)
-{
- WReg regR = gpReg(imlInstruction->op_r_r_s32.regR);
- WReg regA = gpReg(imlInstruction->op_r_r_s32.regA);
- sint32 immS32 = imlInstruction->op_r_r_s32.immS32;
-
- if (imlInstruction->operation == PPCREC_IML_OP_ADD)
- {
- add_imm(regR, regA, immS32, TEMP_GPR1.WReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
- {
- sub_imm(regR, regA, immS32, TEMP_GPR1.WReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_AND)
- {
- mov(TEMP_GPR1.WReg, immS32);
- and_(regR, regA, TEMP_GPR1.WReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_OR)
- {
- mov(TEMP_GPR1.WReg, immS32);
- orr(regR, regA, TEMP_GPR1.WReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_XOR)
- {
- mov(TEMP_GPR1.WReg, immS32);
- eor(regR, regA, TEMP_GPR1.WReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED)
- {
- mov(TEMP_GPR1.WReg, immS32);
- mul(regR, regA, TEMP_GPR1.WReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
- {
- lsl(regR, regA, (uint32)immS32 & 0x1f);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
- {
- lsr(regR, regA, (uint32)immS32 & 0x1f);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
- {
- asr(regR, regA, (uint32)immS32 & 0x1f);
- }
- else
- {
- cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r_s32(): Unsupported operation {:x}", imlInstruction->operation);
- cemu_assert_suspicious();
- return false;
- }
- return true;
-}
-
-bool AArch64GenContext_t::r_r_s32_carry(IMLInstruction* imlInstruction)
-{
- WReg regR = gpReg(imlInstruction->op_r_r_s32_carry.regR);
- WReg regA = gpReg(imlInstruction->op_r_r_s32_carry.regA);
- WReg regCarry = gpReg(imlInstruction->op_r_r_s32_carry.regCarry);
-
- sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32;
- if (imlInstruction->operation == PPCREC_IML_OP_ADD)
- {
- adds_imm(regR, regA, immS32, TEMP_GPR1.WReg);
- cset(regCarry, Cond::CS);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_ADD_WITH_CARRY)
- {
- mov(TEMP_GPR1.WReg, immS32);
- cmp(regCarry, 1);
- adcs(regR, regA, TEMP_GPR1.WReg);
- cset(regCarry, Cond::CS);
- }
- else
- {
- cemu_assert_suspicious();
- return false;
- }
-
- return true;
-}
-
-bool AArch64GenContext_t::r_r_r(IMLInstruction* imlInstruction)
-{
- WReg regResult = gpReg(imlInstruction->op_r_r_r.regR);
- XReg reg64Result = aliasAs(regResult);
- WReg regOperand1 = gpReg(imlInstruction->op_r_r_r.regA);
- WReg regOperand2 = gpReg(imlInstruction->op_r_r_r.regB);
-
- if (imlInstruction->operation == PPCREC_IML_OP_ADD)
- {
- add(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
- {
- sub(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_OR)
- {
- orr(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_AND)
- {
- and_(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_XOR)
- {
- eor(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED)
- {
- mul(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_SLW)
- {
- tst(regOperand2, 32);
- lsl(regResult, regOperand1, regOperand2);
- csel(regResult, regResult, wzr, Cond::EQ);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_SRW)
- {
- tst(regOperand2, 32);
- lsr(regResult, regOperand1, regOperand2);
- csel(regResult, regResult, wzr, Cond::EQ);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE)
- {
- neg(TEMP_GPR1.WReg, regOperand2);
- ror(regResult, regOperand1, TEMP_GPR1.WReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
- {
- asr(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
- {
- lsr(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
- {
- lsl(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED)
- {
- sdiv(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED)
- {
- udiv(regResult, regOperand1, regOperand2);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED)
- {
- smull(reg64Result, regOperand1, regOperand2);
- lsr(reg64Result, reg64Result, 32);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED)
- {
- umull(reg64Result, regOperand1, regOperand2);
- lsr(reg64Result, reg64Result, 32);
- }
- else
- {
- cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r_r(): Unsupported operation {:x}", imlInstruction->operation);
- return false;
- }
- return true;
-}
-
-bool AArch64GenContext_t::r_r_r_carry(IMLInstruction* imlInstruction)
-{
- WReg regR = gpReg(imlInstruction->op_r_r_r_carry.regR);
- WReg regA = gpReg(imlInstruction->op_r_r_r_carry.regA);
- WReg regB = gpReg(imlInstruction->op_r_r_r_carry.regB);
- WReg regCarry = gpReg(imlInstruction->op_r_r_r_carry.regCarry);
-
- if (imlInstruction->operation == PPCREC_IML_OP_ADD)
- {
- adds(regR, regA, regB);
- cset(regCarry, Cond::CS);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_ADD_WITH_CARRY)
- {
- cmp(regCarry, 1);
- adcs(regR, regA, regB);
- cset(regCarry, Cond::CS);
- }
- else
- {
- cemu_assert_suspicious();
- return false;
- }
-
- return true;
-}
-
-Cond ImlCondToArm64Cond(IMLCondition condition)
-{
- switch (condition)
- {
- case IMLCondition::EQ:
- return Cond::EQ;
- case IMLCondition::NEQ:
- return Cond::NE;
- case IMLCondition::UNSIGNED_GT:
- return Cond::HI;
- case IMLCondition::UNSIGNED_LT:
- return Cond::LO;
- case IMLCondition::SIGNED_GT:
- return Cond::GT;
- case IMLCondition::SIGNED_LT:
- return Cond::LT;
- default:
- {
- cemu_assert_suspicious();
- return Cond::EQ;
- }
- }
-}
-
-void AArch64GenContext_t::compare(IMLInstruction* imlInstruction)
-{
- WReg regR = gpReg(imlInstruction->op_compare.regR);
- WReg regA = gpReg(imlInstruction->op_compare.regA);
- WReg regB = gpReg(imlInstruction->op_compare.regB);
- Cond cond = ImlCondToArm64Cond(imlInstruction->op_compare.cond);
- cmp(regA, regB);
- cset(regR, cond);
-}
-
-void AArch64GenContext_t::compare_s32(IMLInstruction* imlInstruction)
-{
- WReg regR = gpReg(imlInstruction->op_compare.regR);
- WReg regA = gpReg(imlInstruction->op_compare.regA);
- sint32 imm = imlInstruction->op_compare_s32.immS32;
- auto cond = ImlCondToArm64Cond(imlInstruction->op_compare.cond);
- cmp_imm(regA, imm, TEMP_GPR1.WReg);
- cset(regR, cond);
-}
-
-void AArch64GenContext_t::cjump(IMLInstruction* imlInstruction, IMLSegment* imlSegment)
-{
- auto regBool = gpReg(imlInstruction->op_conditional_jump.registerBool);
- prepareJump(ConditionalRegJumpInfo{
- .target = imlSegment->nextSegmentBranchTaken,
- .regBool = regBool,
- .mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue,
- });
-}
-
-void AArch64GenContext_t::jump(IMLSegment* imlSegment)
-{
- prepareJump(UnconditionalJumpInfo{.target = imlSegment->nextSegmentBranchTaken});
-}
-
-void AArch64GenContext_t::conditionalJumpCycleCheck(IMLSegment* imlSegment)
-{
- ldr(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles)));
- prepareJump(NegativeRegValueJumpInfo{
- .target = imlSegment->nextSegmentBranchTaken,
- .regValue = TEMP_GPR1.WReg,
- });
-}
-
-void* PPCRecompiler_virtualHLE(PPCInterpreter_t* ppcInterpreter, uint32 hleFuncId)
-{
- void* prevRSPTemp = ppcInterpreter->rspTemp;
- if (hleFuncId == 0xFFD0)
- {
- ppcInterpreter->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call
- ppcInterpreter->gpr[3] = 0;
- PPCInterpreter_nextInstruction(ppcInterpreter);
- return PPCInterpreter_getCurrentInstance();
- }
- else
- {
- auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
- cemu_assert(hleCall != nullptr);
- hleCall(ppcInterpreter);
- }
- ppcInterpreter->rspTemp = prevRSPTemp;
- return PPCInterpreter_getCurrentInstance();
-}
-
-bool AArch64GenContext_t::macro(IMLInstruction* imlInstruction)
-{
- if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
- {
- WReg branchDstReg = gpReg(imlInstruction->op_macro.paramReg);
-
- mov(TEMP_GPR1.WReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
- add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, branchDstReg, ShMod::LSL, 1);
- ldr(TEMP_GPR1.XReg, AdrExt(PPC_REC_INSTANCE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
- mov(LR.WReg, branchDstReg);
- br(TEMP_GPR1.XReg);
- return true;
- }
- else if (imlInstruction->operation == PPCREC_IML_MACRO_BL)
- {
- uint32 newLR = imlInstruction->op_macro.param + 4;
-
- mov(TEMP_GPR1.WReg, newLR);
- str(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR)));
-
- uint32 newIP = imlInstruction->op_macro.param2;
- uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
- mov(TEMP_GPR1.XReg, lookupOffset);
- ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
- mov(LR.WReg, newIP);
- br(TEMP_GPR1.XReg);
- return true;
- }
- else if (imlInstruction->operation == PPCREC_IML_MACRO_B_FAR)
- {
- uint32 newIP = imlInstruction->op_macro.param2;
- uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
- mov(TEMP_GPR1.XReg, lookupOffset);
- ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
- mov(LR.WReg, newIP);
- br(TEMP_GPR1.XReg);
- return true;
- }
- else if (imlInstruction->operation == PPCREC_IML_MACRO_LEAVE)
- {
- uint32 currentInstructionAddress = imlInstruction->op_macro.param;
- mov(TEMP_GPR1.XReg, (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); // newIP = 0 special value for recompiler exit
- ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
- mov(LR.WReg, currentInstructionAddress);
- br(TEMP_GPR1.XReg);
- return true;
- }
- else if (imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK)
- {
- brk(0xf000);
- return true;
- }
- else if (imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES)
- {
- uint32 cycleCount = imlInstruction->op_macro.param;
- AdrUimm adrCycles = AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles));
- ldr(TEMP_GPR1.WReg, adrCycles);
- sub_imm(TEMP_GPR1.WReg, TEMP_GPR1.WReg, cycleCount, TEMP_GPR2.WReg);
- str(TEMP_GPR1.WReg, adrCycles);
- return true;
- }
- else if (imlInstruction->operation == PPCREC_IML_MACRO_HLE)
- {
- uint32 ppcAddress = imlInstruction->op_macro.param;
- uint32 funcId = imlInstruction->op_macro.param2;
- Label cyclesLeftLabel;
-
- // update instruction pointer
- mov(TEMP_GPR1.WReg, ppcAddress);
- str(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
- // set parameters
- str(x30, AdrPreImm(sp, -16));
-
- mov(x0, HCPU_REG);
- mov(w1, funcId);
- // call HLE function
-
- mov(TEMP_GPR1.XReg, (uint64)PPCRecompiler_virtualHLE);
- blr(TEMP_GPR1.XReg);
-
- mov(HCPU_REG, x0);
-
- ldr(x30, AdrPostImm(sp, 16));
-
- // check if cycles where decreased beyond zero, if yes -> leave recompiler
- ldr(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles)));
- tbz(TEMP_GPR1.WReg, 31, cyclesLeftLabel); // check if negative
-
- mov(TEMP_GPR1.XReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
- ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
- ldr(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
- // branch to recompiler exit
- br(TEMP_GPR1.XReg);
-
- L(cyclesLeftLabel);
- // check if instruction pointer was changed
- // assign new instruction pointer to LR.WReg
- ldr(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
- mov(TEMP_GPR1.XReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
- add(TEMP_GPR1.XReg, TEMP_GPR1.XReg, LR.XReg, ShMod::LSL, 1);
- ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg));
- // branch to [ppcRecompilerDirectJumpTable + PPCInterpreter_t::instructionPointer * 2]
- br(TEMP_GPR1.XReg);
- return true;
- }
- else
- {
- cemuLog_log(LogType::Recompiler, "Unknown recompiler macro operation %d\n", imlInstruction->operation);
- cemu_assert_suspicious();
- }
- return false;
-}
-
-bool AArch64GenContext_t::load(IMLInstruction* imlInstruction, bool indexed)
-{
- cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
- cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
- if (indexed)
- cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
-
- sint32 memOffset = imlInstruction->op_storeLoad.immS32;
- bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
- bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
- WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
- WReg dataReg = gpReg(imlInstruction->op_storeLoad.registerData);
-
- add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
- if (indexed)
- add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, gpReg(imlInstruction->op_storeLoad.registerMem2));
-
- auto adr = AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW);
- if (imlInstruction->op_storeLoad.copyWidth == 32)
- {
- ldr(dataReg, adr);
- if (switchEndian)
- rev(dataReg, dataReg);
- }
- else if (imlInstruction->op_storeLoad.copyWidth == 16)
- {
- if (switchEndian)
- {
- ldrh(dataReg, adr);
- rev(dataReg, dataReg);
- if (signExtend)
- asr(dataReg, dataReg, 16);
- else
- lsr(dataReg, dataReg, 16);
- }
- else
- {
- if (signExtend)
- ldrsh(dataReg, adr);
- else
- ldrh(dataReg, adr);
- }
- }
- else if (imlInstruction->op_storeLoad.copyWidth == 8)
- {
- if (signExtend)
- ldrsb(dataReg, adr);
- else
- ldrb(dataReg, adr);
- }
- else
- {
- return false;
- }
- return true;
-}
-
-bool AArch64GenContext_t::store(IMLInstruction* imlInstruction, bool indexed)
-{
- cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
- cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
- if (indexed)
- cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
-
- WReg dataReg = gpReg(imlInstruction->op_storeLoad.registerData);
- WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
- sint32 memOffset = imlInstruction->op_storeLoad.immS32;
- bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
-
- add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
- if (indexed)
- add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, gpReg(imlInstruction->op_storeLoad.registerMem2));
- AdrExt adr = AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW);
- if (imlInstruction->op_storeLoad.copyWidth == 32)
- {
- if (swapEndian)
- {
- rev(TEMP_GPR2.WReg, dataReg);
- str(TEMP_GPR2.WReg, adr);
- }
- else
- {
- str(dataReg, adr);
- }
- }
- else if (imlInstruction->op_storeLoad.copyWidth == 16)
- {
- if (swapEndian)
- {
- rev(TEMP_GPR2.WReg, dataReg);
- lsr(TEMP_GPR2.WReg, TEMP_GPR2.WReg, 16);
- strh(TEMP_GPR2.WReg, adr);
- }
- else
- {
- strh(dataReg, adr);
- }
- }
- else if (imlInstruction->op_storeLoad.copyWidth == 8)
- {
- strb(dataReg, adr);
- }
- else
- {
- return false;
- }
- return true;
-}
-
-void AArch64GenContext_t::atomic_cmp_store(IMLInstruction* imlInstruction)
-{
- WReg outReg = gpReg(imlInstruction->op_atomic_compare_store.regBoolOut);
- WReg eaReg = gpReg(imlInstruction->op_atomic_compare_store.regEA);
- WReg valReg = gpReg(imlInstruction->op_atomic_compare_store.regWriteValue);
- WReg cmpValReg = gpReg(imlInstruction->op_atomic_compare_store.regCompareValue);
-
- if (s_cpu.isAtomicSupported())
- {
- mov(TEMP_GPR2.WReg, cmpValReg);
- add(TEMP_GPR1.XReg, MEM_BASE_REG, eaReg, ExtMod::UXTW);
- casal(TEMP_GPR2.WReg, valReg, AdrNoOfs(TEMP_GPR1.XReg));
- cmp(TEMP_GPR2.WReg, cmpValReg);
- cset(outReg, Cond::EQ);
- }
- else
- {
- Label notEqual;
- Label storeFailed;
-
- add(TEMP_GPR1.XReg, MEM_BASE_REG, eaReg, ExtMod::UXTW);
- L(storeFailed);
- ldaxr(TEMP_GPR2.WReg, AdrNoOfs(TEMP_GPR1.XReg));
- cmp(TEMP_GPR2.WReg, cmpValReg);
- bne(notEqual);
- stlxr(TEMP_GPR2.WReg, valReg, AdrNoOfs(TEMP_GPR1.XReg));
- cbnz(TEMP_GPR2.WReg, storeFailed);
-
- L(notEqual);
- cset(outReg, Cond::EQ);
- }
-}
-
-bool AArch64GenContext_t::fpr_load(IMLInstruction* imlInstruction, bool indexed)
-{
- const IMLReg& dataReg = imlInstruction->op_storeLoad.registerData;
- SReg dataSReg = fpReg(dataReg);
- DReg dataDReg = fpReg(dataReg);
- WReg realRegisterMem = gpReg(imlInstruction->op_storeLoad.registerMem);
- WReg indexReg = indexed ? gpReg(imlInstruction->op_storeLoad.registerMem2) : wzr;
- sint32 adrOffset = imlInstruction->op_storeLoad.immS32;
- uint8 mode = imlInstruction->op_storeLoad.mode;
-
- if (mode == PPCREC_FPR_LD_MODE_SINGLE)
- {
- add_imm(TEMP_GPR1.WReg, realRegisterMem, adrOffset, TEMP_GPR1.WReg);
- if (indexed)
- add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
- ldr(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
- rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
- fmov(dataSReg, TEMP_GPR2.WReg);
-
- if (imlInstruction->op_storeLoad.flags2.notExpanded)
- {
- // leave value as single
- }
- else
- {
- fcvt(dataDReg, dataSReg);
- }
- }
- else if (mode == PPCREC_FPR_LD_MODE_DOUBLE)
- {
- add_imm(TEMP_GPR1.WReg, realRegisterMem, adrOffset, TEMP_GPR1.WReg);
- if (indexed)
- add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
- ldr(TEMP_GPR2.XReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
- rev(TEMP_GPR2.XReg, TEMP_GPR2.XReg);
- fmov(dataDReg, TEMP_GPR2.XReg);
- }
- else
- {
- return false;
- }
- return true;
-}
-
-// store to memory
-bool AArch64GenContext_t::fpr_store(IMLInstruction* imlInstruction, bool indexed)
-{
- const IMLReg& dataImlReg = imlInstruction->op_storeLoad.registerData;
- DReg dataDReg = fpReg(dataImlReg);
- SReg dataSReg = fpReg(dataImlReg);
- WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem);
- WReg indexReg = indexed ? gpReg(imlInstruction->op_storeLoad.registerMem2) : wzr;
- sint32 memOffset = imlInstruction->op_storeLoad.immS32;
- uint8 mode = imlInstruction->op_storeLoad.mode;
-
- if (mode == PPCREC_FPR_ST_MODE_SINGLE)
- {
- add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
- if (indexed)
- add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
-
- if (imlInstruction->op_storeLoad.flags2.notExpanded)
- {
- // value is already in single format
- fmov(TEMP_GPR2.WReg, dataSReg);
- }
- else
- {
- fcvt(TEMP_FPR.SReg, dataDReg);
- fmov(TEMP_GPR2.WReg, TEMP_FPR.SReg);
- }
- rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
- str(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
- }
- else if (mode == PPCREC_FPR_ST_MODE_DOUBLE)
- {
- add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
- if (indexed)
- add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
- fmov(TEMP_GPR2.XReg, dataDReg);
- rev(TEMP_GPR2.XReg, TEMP_GPR2.XReg);
- str(TEMP_GPR2.XReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
- }
- else if (mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0)
- {
- add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg);
- if (indexed)
- add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg);
- fmov(TEMP_GPR2.WReg, dataSReg);
- rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg);
- str(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW));
- }
- else
- {
- cemu_assert_suspicious();
- cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
- return false;
- }
- return true;
-}
-
-// FPR op FPR
-void AArch64GenContext_t::fpr_r_r(IMLInstruction* imlInstruction)
-{
- auto imlRegR = imlInstruction->op_fpr_r_r.regR;
- auto imlRegA = imlInstruction->op_fpr_r_r.regA;
-
- if (imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT)
- {
- fcvtzs(gpReg(imlRegR), fpReg(imlRegA));
- return;
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
- {
- scvtf(fpReg(imlRegR), gpReg(imlRegA));
- return;
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
- {
- cemu_assert_debug(imlRegR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
- // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
- DReg regFprDReg = fpReg(imlRegR);
- SReg regFprSReg = fpReg(imlRegR);
- if (imlRegA.GetRegFormat() == IMLRegFormat::I32)
- {
- fmov(regFprSReg, gpReg(imlRegA));
- // float to double
- fcvt(regFprDReg, regFprSReg);
- }
- else if (imlRegA.GetRegFormat() == IMLRegFormat::I64)
- {
- fmov(regFprDReg, gpReg(imlRegA));
- }
- else
- {
- cemu_assert_unimplemented();
- }
- return;
- }
-
- DReg regR = fpReg(imlRegR);
- DReg regA = fpReg(imlRegA);
-
- if (imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN)
- {
- fmov(regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
- {
- fmul(regR, regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE)
- {
- fdiv(regR, regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
- {
- fadd(regR, regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB)
- {
- fsub(regR, regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ)
- {
- fcvtzs(regR, regA);
- }
- else
- {
- cemu_assert_suspicious();
- }
-}
-
-void AArch64GenContext_t::fpr_r_r_r(IMLInstruction* imlInstruction)
-{
- DReg regR = fpReg(imlInstruction->op_fpr_r_r_r.regR);
- DReg regA = fpReg(imlInstruction->op_fpr_r_r_r.regA);
- DReg regB = fpReg(imlInstruction->op_fpr_r_r_r.regB);
-
- if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
- {
- fmul(regR, regA, regB);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
- {
- fadd(regR, regA, regB);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB)
- {
- fsub(regR, regA, regB);
- }
- else
- {
- cemu_assert_suspicious();
- }
-}
-
-/*
- * FPR = op (fprA, fprB, fprC)
- */
-void AArch64GenContext_t::fpr_r_r_r_r(IMLInstruction* imlInstruction)
-{
- DReg regR = fpReg(imlInstruction->op_fpr_r_r_r_r.regR);
- DReg regA = fpReg(imlInstruction->op_fpr_r_r_r_r.regA);
- DReg regB = fpReg(imlInstruction->op_fpr_r_r_r_r.regB);
- DReg regC = fpReg(imlInstruction->op_fpr_r_r_r_r.regC);
-
- if (imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT)
- {
- fcmp(regA, 0.0);
- fcsel(regR, regC, regB, Cond::GE);
- }
- else
- {
- cemu_assert_suspicious();
- }
-}
-
-void AArch64GenContext_t::fpr_r(IMLInstruction* imlInstruction)
-{
- DReg regRDReg = fpReg(imlInstruction->op_fpr_r.regR);
- SReg regRSReg = fpReg(imlInstruction->op_fpr_r.regR);
-
- if (imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE)
- {
- fneg(regRDReg, regRDReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE)
- {
- fmov(regRDReg, 1.0);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ABS)
- {
- fabs(regRDReg, regRDReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS)
- {
- fabs(regRDReg, regRDReg);
- fneg(regRDReg, regRDReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
- {
- // convert to 32bit single
- fcvt(regRSReg, regRDReg);
- // convert back to 64bit double
- fcvt(regRDReg, regRSReg);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
- {
- // convert bottom to 64bit double
- fcvt(regRDReg, regRSReg);
- }
- else
- {
- cemu_assert_unimplemented();
- }
-}
-
-Cond ImlFPCondToArm64Cond(IMLCondition cond)
-{
- switch (cond)
- {
- case IMLCondition::UNORDERED_GT:
- return Cond::GT;
- case IMLCondition::UNORDERED_LT:
- return Cond::MI;
- case IMLCondition::UNORDERED_EQ:
- return Cond::EQ;
- case IMLCondition::UNORDERED_U:
- return Cond::VS;
- default:
- {
- cemu_assert_suspicious();
- return Cond::EQ;
- }
- }
-}
-
-void AArch64GenContext_t::fpr_compare(IMLInstruction* imlInstruction)
-{
- WReg regR = gpReg(imlInstruction->op_fpr_compare.regR);
- DReg regA = fpReg(imlInstruction->op_fpr_compare.regA);
- DReg regB = fpReg(imlInstruction->op_fpr_compare.regB);
- auto cond = ImlFPCondToArm64Cond(imlInstruction->op_fpr_compare.cond);
- fcmp(regA, regB);
- cset(regR, cond);
-}
-
-void AArch64GenContext_t::call_imm(IMLInstruction* imlInstruction)
-{
- str(x30, AdrPreImm(sp, -16));
- mov(TEMP_GPR1.XReg, imlInstruction->op_call_imm.callAddress);
- blr(TEMP_GPR1.XReg);
- ldr(x30, AdrPostImm(sp, 16));
-}
-
-bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext)
-{
- AArch64Allocator allocator;
- AArch64GenContext_t aarch64GenContext{&allocator};
-
- // generate iml instruction code
- bool codeGenerationFailed = false;
- for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
- {
- if (codeGenerationFailed)
- break;
- segIt->x64Offset = aarch64GenContext.getSize();
-
- aarch64GenContext.storeSegmentStart(segIt);
-
- for (size_t i = 0; i < segIt->imlList.size(); i++)
- {
- IMLInstruction* imlInstruction = segIt->imlList.data() + i;
- if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME)
- {
- aarch64GenContext.r_name(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_NAME_R)
- {
- aarch64GenContext.name_r(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R)
- {
- if (!aarch64GenContext.r_r(imlInstruction))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
- {
- if (!aarch64GenContext.r_s32(imlInstruction))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
- {
- if (!aarch64GenContext.r_r_s32(imlInstruction))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY)
- {
- if (!aarch64GenContext.r_r_s32_carry(imlInstruction))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
- {
- if (!aarch64GenContext.r_r_r(imlInstruction))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY)
- {
- if (!aarch64GenContext.r_r_r_carry(imlInstruction))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
- {
- aarch64GenContext.compare(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
- {
- aarch64GenContext.compare_s32(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
- {
- aarch64GenContext.cjump(imlInstruction, segIt);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
- {
- aarch64GenContext.jump(segIt);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
- {
- aarch64GenContext.conditionalJumpCycleCheck(segIt);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO)
- {
- if (!aarch64GenContext.macro(imlInstruction))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD)
- {
- if (!aarch64GenContext.load(imlInstruction, false))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED)
- {
- if (!aarch64GenContext.load(imlInstruction, true))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_STORE)
- {
- if (!aarch64GenContext.store(imlInstruction, false))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED)
- {
- if (!aarch64GenContext.store(imlInstruction, true))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
- {
- aarch64GenContext.atomic_cmp_store(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM)
- {
- aarch64GenContext.call_imm(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_NO_OP)
- {
- // no op
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD)
- {
- if (!aarch64GenContext.fpr_load(imlInstruction, false))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
- {
- if (!aarch64GenContext.fpr_load(imlInstruction, true))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE)
- {
- if (!aarch64GenContext.fpr_store(imlInstruction, false))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
- {
- if (!aarch64GenContext.fpr_store(imlInstruction, true))
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R)
- {
- aarch64GenContext.fpr_r_r(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R)
- {
- aarch64GenContext.fpr_r_r_r(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R)
- {
- aarch64GenContext.fpr_r_r_r_r(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
- {
- aarch64GenContext.fpr_r(imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE)
- {
- aarch64GenContext.fpr_compare(imlInstruction);
- }
- else
- {
- codeGenerationFailed = true;
- cemu_assert_suspicious();
- cemuLog_log(LogType::Recompiler, "PPCRecompiler_generateAArch64Code(): Unsupported iml type {}", imlInstruction->type);
- }
- }
- }
-
- // handle failed code generation
- if (codeGenerationFailed)
- {
- return false;
- }
-
- if (!aarch64GenContext.processAllJumps())
- {
- cemuLog_log(LogType::Recompiler, "PPCRecompiler_generateAArch64Code(): some jumps exceeded the +/-128MB offset.");
- return false;
- }
-
- aarch64GenContext.readyRE();
-
- // set code
- PPCRecFunction->x86Code = aarch64GenContext.getCode();
- PPCRecFunction->x86Size = aarch64GenContext.getMaxSize();
- // set free disabled to skip freeing the code from the CodeGenerator destructor
- allocator.setFreeDisabled(true);
- return true;
-}
-
-void PPCRecompiler_cleanupAArch64Code(void* code, size_t size)
-{
- AArch64Allocator allocator;
- if (allocator.useProtect())
- CodeArray::protect(code, size, CodeArray::PROTECT_RW);
- allocator.free(static_cast(code));
-}
-
-void AArch64GenContext_t::enterRecompilerCode()
-{
- constexpr size_t STACK_SIZE = 160 /* x19 .. x30 + v8.d[0] .. v15.d[0] */;
- static_assert(STACK_SIZE % 16 == 0);
- sub(sp, sp, STACK_SIZE);
- mov(x9, sp);
-
- stp(x19, x20, AdrPostImm(x9, 16));
- stp(x21, x22, AdrPostImm(x9, 16));
- stp(x23, x24, AdrPostImm(x9, 16));
- stp(x25, x26, AdrPostImm(x9, 16));
- stp(x27, x28, AdrPostImm(x9, 16));
- stp(x29, x30, AdrPostImm(x9, 16));
- st4((v8.d - v11.d)[0], AdrPostImm(x9, 32));
- st4((v12.d - v15.d)[0], AdrPostImm(x9, 32));
- mov(HCPU_REG, x1); // call argument 2
- mov(PPC_REC_INSTANCE_REG, (uint64)ppcRecompilerInstanceData);
- mov(MEM_BASE_REG, (uint64)memory_base);
-
- // branch to recFunc
- blr(x0); // call argument 1
-
- mov(x9, sp);
- ldp(x19, x20, AdrPostImm(x9, 16));
- ldp(x21, x22, AdrPostImm(x9, 16));
- ldp(x23, x24, AdrPostImm(x9, 16));
- ldp(x25, x26, AdrPostImm(x9, 16));
- ldp(x27, x28, AdrPostImm(x9, 16));
- ldp(x29, x30, AdrPostImm(x9, 16));
- ld4((v8.d - v11.d)[0], AdrPostImm(x9, 32));
- ld4((v12.d - v15.d)[0], AdrPostImm(x9, 32));
-
- add(sp, sp, STACK_SIZE);
-
- ret();
-}
-
-void AArch64GenContext_t::leaveRecompilerCode()
-{
- str(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer)));
- ret();
-}
-
-bool initializedInterfaceFunctions = false;
-AArch64GenContext_t enterRecompilerCode_ctx{};
-
-AArch64GenContext_t leaveRecompilerCode_unvisited_ctx{};
-AArch64GenContext_t leaveRecompilerCode_visited_ctx{};
-void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions()
-{
- if (initializedInterfaceFunctions)
- return;
- initializedInterfaceFunctions = true;
-
- enterRecompilerCode_ctx.enterRecompilerCode();
- enterRecompilerCode_ctx.readyRE();
- PPCRecompiler_enterRecompilerCode = enterRecompilerCode_ctx.getCode();
-
- leaveRecompilerCode_unvisited_ctx.leaveRecompilerCode();
- leaveRecompilerCode_unvisited_ctx.readyRE();
- PPCRecompiler_leaveRecompilerCode_unvisited = leaveRecompilerCode_unvisited_ctx.getCode();
-
- leaveRecompilerCode_visited_ctx.leaveRecompilerCode();
- leaveRecompilerCode_visited_ctx.readyRE();
- PPCRecompiler_leaveRecompilerCode_visited = leaveRecompilerCode_visited_ctx.getCode();
-}
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
deleted file mode 100644
index b610ee04..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include "HW/Espresso/Recompiler/IML/IMLInstruction.h"
-#include "../PPCRecompiler.h"
-
-bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext);
-void PPCRecompiler_cleanupAArch64Code(void* code, size_t size);
-
-void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
-
-// architecture specific constants
-namespace IMLArchAArch64
-{
- static constexpr int PHYSREG_GPR_BASE = 0;
- static constexpr int PHYSREG_GPR_COUNT = 25;
- static constexpr int PHYSREG_FPR_BASE = PHYSREG_GPR_COUNT;
- static constexpr int PHYSREG_FPR_COUNT = 31;
-}; // namespace IMLArchAArch64
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
deleted file mode 100644
index eadb80fb..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
+++ /dev/null
@@ -1,1672 +0,0 @@
-#include "Cafe/HW/Espresso/PPCState.h"
-#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
-#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h"
-#include "../PPCRecompiler.h"
-#include "../PPCRecompilerIml.h"
-#include "BackendX64.h"
-#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
-#include "util/MemMapper/MemMapper.h"
-#include "Common/cpu_features.h"
-#include
-
-static x86Assembler64::GPR32 _reg32(IMLReg physReg)
-{
- cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32);
- IMLRegID regId = physReg.GetRegID();
- cemu_assert_debug(regId < 16);
- return (x86Assembler64::GPR32)regId;
-}
-
-static uint32 _reg64(IMLReg physReg)
-{
- cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64);
- IMLRegID regId = physReg.GetRegID();
- cemu_assert_debug(regId < 16);
- return regId;
-}
-
-uint32 _regF64(IMLReg physReg)
-{
- cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::F64);
- IMLRegID regId = physReg.GetRegID();
- cemu_assert_debug(regId >= IMLArchX86::PHYSREG_FPR_BASE && regId < IMLArchX86::PHYSREG_FPR_BASE+16);
- regId -= IMLArchX86::PHYSREG_FPR_BASE;
- return regId;
-}
-
-static x86Assembler64::GPR8_REX _reg8(IMLReg physReg)
-{
- cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); // for now these are represented as 32bit
- return (x86Assembler64::GPR8_REX)physReg.GetRegID();
-}
-
-static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
-{
- return (x86Assembler64::GPR32)regId;
-}
-
-static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
-{
- return (x86Assembler64::GPR8_REX)regId;
-}
-
-static x86Assembler64::GPR8_REX _reg8_from_reg64(uint32 regId)
-{
- return (x86Assembler64::GPR8_REX)regId;
-}
-
-static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId)
-{
- return (x86Assembler64::GPR64)regId;
-}
-
-X86Cond _x86Cond(IMLCondition imlCond)
-{
- switch (imlCond)
- {
- case IMLCondition::EQ:
- return X86_CONDITION_Z;
- case IMLCondition::NEQ:
- return X86_CONDITION_NZ;
- case IMLCondition::UNSIGNED_GT:
- return X86_CONDITION_NBE;
- case IMLCondition::UNSIGNED_LT:
- return X86_CONDITION_B;
- case IMLCondition::SIGNED_GT:
- return X86_CONDITION_NLE;
- case IMLCondition::SIGNED_LT:
- return X86_CONDITION_L;
- default:
- break;
- }
- cemu_assert_suspicious();
- return X86_CONDITION_Z;
-}
-
-X86Cond _x86CondInverted(IMLCondition imlCond)
-{
- switch (imlCond)
- {
- case IMLCondition::EQ:
- return X86_CONDITION_NZ;
- case IMLCondition::NEQ:
- return X86_CONDITION_Z;
- case IMLCondition::UNSIGNED_GT:
- return X86_CONDITION_BE;
- case IMLCondition::UNSIGNED_LT:
- return X86_CONDITION_NB;
- case IMLCondition::SIGNED_GT:
- return X86_CONDITION_LE;
- case IMLCondition::SIGNED_LT:
- return X86_CONDITION_NL;
- default:
- break;
- }
- cemu_assert_suspicious();
- return X86_CONDITION_Z;
-}
-
-X86Cond _x86Cond(IMLCondition imlCond, bool condIsInverted)
-{
- if (condIsInverted)
- return _x86CondInverted(imlCond);
- return _x86Cond(imlCond);
-}
-
-/*
-* Remember current instruction output offset for reloc
-* The instruction generated after this method has been called will be adjusted
-*/
-void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr)
-{
- x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo);
-}
-
-void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset)
-{
- uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset;
- if (instructionData[0] == 0x0F && (instructionData[1] >= 0x80 && instructionData[1] <= 0x8F))
- {
- // far conditional jump
- *(uint32*)(instructionData + 2) = (destinationOffset - (jumpInstructionOffset + 6));
- }
- else if (instructionData[0] >= 0x70 && instructionData[0] <= 0x7F)
- {
- // short conditional jump
- sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2)));
- cemu_assert_debug(distance >= -128 && distance <= 127);
- *(uint8*)(instructionData + 1) = (uint8)distance;
- }
- else if (instructionData[0] == 0xE9)
- {
- *(uint32*)(instructionData + 1) = (destinationOffset - (jumpInstructionOffset + 5));
- }
- else if (instructionData[0] == 0xEB)
- {
- sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2)));
- cemu_assert_debug(distance >= -128 && distance <= 127);
- *(uint8*)(instructionData + 1) = (uint8)distance;
- }
- else
- {
- assert_dbg();
- }
-}
-
-void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId)
-{
- void* prevRSPTemp = hCPU->rspTemp;
- if( hleFuncId == 0xFFD0 )
- {
- hCPU->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call
- hCPU->gpr[3] = 0;
- PPCInterpreter_nextInstruction(hCPU);
- return hCPU;
- }
- else
- {
- auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
- cemu_assert(hleCall != nullptr);
- hleCall(hCPU);
- }
- hCPU->rspTemp = prevRSPTemp;
- return PPCInterpreter_getCurrentInstance();
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
- {
- //x64Gen_int3(x64GenContext);
- uint32 branchDstReg = _reg32(imlInstruction->op_macro.paramReg);
- if(X86_REG_RDX != branchDstReg)
- x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg);
- // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX
- // JMP [offset+RDX*(8/4)+R15]
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0xFF);
- x64Gen_writeU8(x64GenContext, 0xA4);
- x64Gen_writeU8(x64GenContext, 0x57);
- x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
- return true;
- }
- else if( imlInstruction->operation == PPCREC_IML_MACRO_BL )
- {
- // MOV DWORD [SPR_LinkRegister], newLR
- uint32 newLR = imlInstruction->op_macro.param + 4;
- x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), newLR);
- // remember new instruction pointer in RDX
- uint32 newIP = imlInstruction->op_macro.param2;
- x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP);
- // since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary
- uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
- if (lookupOffset >= 0x80000000ULL)
- {
- // JMP [offset+RDX*(8/4)+R15]
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0xFF);
- x64Gen_writeU8(x64GenContext, 0xA4);
- x64Gen_writeU8(x64GenContext, 0x57);
- x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
- }
- else
- {
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0xFF);
- x64Gen_writeU8(x64GenContext, 0xA7);
- x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
- }
- return true;
- }
- else if( imlInstruction->operation == PPCREC_IML_MACRO_B_FAR )
- {
- // remember new instruction pointer in RDX
- uint32 newIP = imlInstruction->op_macro.param2;
- x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP);
- // Since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary
- uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
- if (lookupOffset >= 0x80000000ULL)
- {
- // JMP [offset+RDX*(8/4)+R15]
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0xFF);
- x64Gen_writeU8(x64GenContext, 0xA4);
- x64Gen_writeU8(x64GenContext, 0x57);
- x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
- }
- else
- {
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0xFF);
- x64Gen_writeU8(x64GenContext, 0xA7);
- x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
- }
- return true;
- }
- else if( imlInstruction->operation == PPCREC_IML_MACRO_LEAVE )
- {
- uint32 currentInstructionAddress = imlInstruction->op_macro.param;
- // remember PC value in REG_EDX
- x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, currentInstructionAddress);
-
- uint32 newIP = 0; // special value for recompiler exit
- uint64 lookupOffset = (uint64)&(((PPCRecompilerInstanceData_t*)NULL)->ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL;
- // JMP [R15+offset]
- x64Gen_writeU8(x64GenContext, 0x41);
- x64Gen_writeU8(x64GenContext, 0xFF);
- x64Gen_writeU8(x64GenContext, 0xA7);
- x64Gen_writeU32(x64GenContext, (uint32)lookupOffset);
- return true;
- }
- else if( imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK )
- {
- x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_macro.param2);
- x64Gen_int3(x64GenContext);
- return true;
- }
- else if( imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES )
- {
- uint32 cycleCount = imlInstruction->op_macro.param;
- x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), cycleCount);
- return true;
- }
- else if( imlInstruction->operation == PPCREC_IML_MACRO_HLE )
- {
- uint32 ppcAddress = imlInstruction->op_macro.param;
- uint32 funcId = imlInstruction->op_macro.param2;
- // update instruction pointer
- x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress);
- // set parameters
- x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, REG_RESV_HCPU);
- x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RDX, funcId);
- // restore stackpointer from hCPU->rspTemp
- x64Emit_mov_reg64_mem64(x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp));
- // reserve space on stack for call parameters
- x64Gen_sub_reg64_imm32(x64GenContext, X86_REG_RSP, 8*11); // must be uneven number in order to retain stack 0x10 alignment
- x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RBP, 0);
- // call HLE function
- x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_virtualHLE);
- x64Gen_call_reg64(x64GenContext, X86_REG_RAX);
- // restore RSP to hCPU (from RAX, result of PPCRecompiler_virtualHLE)
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_HCPU, X86_REG_RAX);
- // MOV R15, ppcRecompilerInstanceData
- x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData);
- // MOV R13, memory_base
- x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base);
- // check if cycles where decreased beyond zero, if yes -> leave recompiler
- x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
- sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
- x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0);
-
- x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RDX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer));
- // set EAX to 0 (we assume that ppcRecompilerDirectJumpTable[0] will be a recompiler escape function)
- x64Gen_xor_reg32_reg32(x64GenContext, X86_REG_RAX, X86_REG_RAX);
- // ADD RAX, REG_RESV_RECDATA
- x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA);
- // JMP [recompilerCallTable+EAX/4*8]
- x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
- PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
- // check if instruction pointer was changed
- // assign new instruction pointer to EAX
- x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer));
- // remember instruction pointer in REG_EDX
- x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, X86_REG_RAX);
- // EAX *= 2
- x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_RAX);
- // ADD RAX, REG_RESV_RECDATA
- x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA);
- // JMP [ppcRecompilerDirectJumpTable+RAX/4*8]
- x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
- return true;
- }
- else
- {
- debug_printf("Unknown recompiler macro operation %d\n", imlInstruction->operation);
- assert_dbg();
- }
- return false;
-}
-
-/*
-* Load from memory
-*/
-bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
-{
- cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
- cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
- if (indexed)
- cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
-
- IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
- IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
- IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
- if( indexed )
- realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
- if( indexed && realRegisterMem == realRegisterMem2 )
- {
- return false;
- }
- if( indexed && realRegisterData == realRegisterMem2 )
- {
- // for indexed memory access realRegisterData must not be the same register as the second memory register,
- // this can easily be worked around by swapping realRegisterMem and realRegisterMem2
- std::swap(realRegisterMem, realRegisterMem2);
- }
-
- bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
- bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
- if( imlInstruction->op_storeLoad.copyWidth == 32 )
- {
- if (indexed)
- {
- x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2);
- }
- if( g_CPUFeatures.x86.movbe && switchEndian )
- {
- if (indexed)
- {
- x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
- }
- else
- {
- x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
- }
- }
- else
- {
- if (indexed)
- {
- x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
- if (switchEndian)
- x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
- }
- else
- {
- x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
- if (switchEndian)
- x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
- }
- }
- }
- else if( imlInstruction->op_storeLoad.copyWidth == 16 )
- {
- if (indexed)
- {
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- if(g_CPUFeatures.x86.movbe && switchEndian )
- {
- x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
- if( indexed && realRegisterMem != realRegisterData )
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- else
- {
- x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
- if( indexed && realRegisterMem != realRegisterData )
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- if( switchEndian )
- x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8);
- }
- if( signExtend )
- x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
- else
- x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
- }
- else if( imlInstruction->op_storeLoad.copyWidth == 8 )
- {
- if( indexed )
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- if( signExtend )
- x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
- else
- x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
- if( indexed && realRegisterMem != realRegisterData )
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- else
- return false;
- return true;
-}
-
-/*
-* Write to memory
-*/
-bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
-{
- cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
- cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
- if (indexed)
- cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
-
- IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
- IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
- IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
- if (indexed)
- realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
-
- if (indexed && realRegisterMem == realRegisterMem2)
- {
- return false;
- }
- if (indexed && realRegisterData == realRegisterMem2)
- {
- // for indexed memory access realRegisterData must not be the same register as the second memory register,
- // this can easily be worked around by swapping realRegisterMem and realRegisterMem2
- std::swap(realRegisterMem, realRegisterMem2);
- }
-
- bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
- bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
- if (imlInstruction->op_storeLoad.copyWidth == 32)
- {
- uint32 valueRegister;
- if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData)
- {
- valueRegister = realRegisterData;
- }
- else
- {
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
- valueRegister = REG_RESV_TEMP;
- }
- if (!g_CPUFeatures.x86.movbe && swapEndian)
- x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister);
- if (indexed)
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- if (g_CPUFeatures.x86.movbe && swapEndian)
- x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
- else
- x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
- if (indexed)
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- else if (imlInstruction->op_storeLoad.copyWidth == 16)
- {
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
- if (swapEndian)
- x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8);
- if (indexed)
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
- if (indexed)
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- // todo: Optimize this, e.g. by using MOVBE
- }
- else if (imlInstruction->op_storeLoad.copyWidth == 8)
- {
- if (indexed && realRegisterMem == realRegisterData)
- {
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
- realRegisterData = REG_RESV_TEMP;
- }
- if (indexed)
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData);
- if (indexed)
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- else
- return false;
- return true;
-}
-
-void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut));
- auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA);
- auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue);
- auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue);
-
- cemu_assert_debug(regBoolOut == X86_REG_EAX);
- cemu_assert_debug(regEA != X86_REG_EAX);
- cemu_assert_debug(regVal != X86_REG_EAX);
- cemu_assert_debug(regCmp != X86_REG_EAX);
-
- x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp);
- x64GenContext->emitter->LockPrefix();
- x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal);
- x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut);
- x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here
-}
-
-void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- // the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here
- x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x20); // reserve enough space for any parameters while keeping stack alignment of 16 intact
- x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress);
- x64GenContext->emitter->CALL_q(X86_REG_RAX);
- x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x20);
- // a note about the stack pointer:
- // currently the code generated by generateEnterRecompilerCode makes sure the stack is 16 byte aligned, so we don't need to fix it up here
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- auto regR = _reg32(imlInstruction->op_r_r.regR);
- auto regA = _reg32(imlInstruction->op_r_r.regA);
-
- if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
- {
- // registerResult = registerA
- if (regR != regA)
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
- {
- if (regA != regR)
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); // if movbe is available we can move and swap in a single instruction?
- x64Gen_bswap_reg64Lower32bit(x64GenContext, regR);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 )
- {
- x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, regR, regA);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
- {
- x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, regR, reg32ToReg16(regA));
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_NOT )
- {
- // copy register content if different registers
- if( regR != regA )
- x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
- x64Gen_not_reg64Low32(x64GenContext, regR);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
- {
- // copy register content if different registers
- if (regR != regA)
- x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
- x64Gen_neg_reg64Low32(x64GenContext, regR);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW )
- {
- // count leading zeros
- // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
- if(g_CPUFeatures.x86.lzcnt)
- {
- x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, regR, regA);
- }
- else
- {
- x64Gen_test_reg64Low32_reg64Low32(x64GenContext, regA, regA);
- sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
- x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0);
- x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, regR, regA);
- x64Gen_neg_reg64Low32(x64GenContext, regR);
- x64Gen_add_reg64Low32_imm32(x64GenContext, regR, 32-1);
- sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
- x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
- PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
- x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, 32);
- PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
- }
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP)
- {
- x64GenContext->emitter->CMP_dd(regR, regA);
- }
- else
- {
- cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
- return false;
- }
- return true;
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- auto regR = _reg32(imlInstruction->op_r_immS32.regR);
-
- if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
- {
- x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, (uint32)imlInstruction->op_r_immS32.immS32);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
- {
- cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0);
- x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP)
- {
- sint32 imm = imlInstruction->op_r_immS32.immS32;
- x64GenContext->emitter->CMP_di32(regR, imm);
- }
- else
- {
- cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
- return false;
- }
- return true;
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR);
- auto rRegOperand1 = _reg32(imlInstruction->op_r_r_r.regA);
- auto rRegOperand2 = _reg32(imlInstruction->op_r_r_r.regB);
-
- if (imlInstruction->operation == PPCREC_IML_OP_ADD)
- {
- // registerResult = registerOperand1 + registerOperand2
- if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
- {
- // be careful not to overwrite the operand before we use it
- if( rRegResult == rRegOperand1 )
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- else
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
- }
- else
- {
- // copy operand1 to destination register before doing addition
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- }
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
- {
- if( rRegOperand1 == rRegOperand2 )
- {
- // result = operand1 - operand1 -> 0
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult);
- }
- else if( rRegResult == rRegOperand1 )
- {
- // result = result - operand2
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- }
- else if ( rRegResult == rRegOperand2 )
- {
- // result = operand1 - result
- x64Gen_neg_reg64Low32(x64GenContext, rRegResult);
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
- }
- else
- {
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- }
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR)
- {
- if (rRegResult == rRegOperand2)
- std::swap(rRegOperand1, rRegOperand2);
-
- if (rRegResult != rRegOperand1)
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
-
- if (imlInstruction->operation == PPCREC_IML_OP_OR)
- x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- else if (imlInstruction->operation == PPCREC_IML_OP_AND)
- x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- else
- x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
- {
- // registerResult = registerOperand1 * registerOperand2
- if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
- {
- // be careful not to overwrite the operand before we use it
- if( rRegResult == rRegOperand1 )
- x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- else
- x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
- }
- else
- {
- // copy operand1 to destination register before doing multiplication
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
- // add operand2
- x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
- }
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW )
- {
- // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits)
-
- if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW)
- {
- // use BMI2 SHRX if available
- x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
- }
- else if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SLW)
- {
- // use BMI2 SHLX if available
- x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
- x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); // trim result to 32bit
- }
- else
- {
- // lazy and slow way to do shift by register without relying on ECX/CL or BMI2
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
- for (sint32 b = 0; b < 6; b++)
- {
- x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1 << b));
- sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
- x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
- if (b == 5)
- {
- x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
- }
- else
- {
- if (imlInstruction->operation == PPCREC_IML_OP_SLW)
- x64Gen_shl_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b));
- else
- x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b));
- }
- PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
- }
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
- }
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
- {
- // todo: Use BMI2 rotate if available
- // check if CL/ECX/RCX is available
- if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX )
- {
- // swap operand 2 with RCX
- x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
- // move operand 1 to temp register
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
- // rotate
- x64Gen_rol_reg64Low32_cl(x64GenContext, REG_RESV_TEMP);
- // undo swap operand 2 with RCX
- x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
- // copy to result register
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
- }
- else
- {
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
- // lazy and slow way to do shift by register without relying on ECX/CL
- for(sint32 b=0; b<5; b++)
- {
- x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<emitter->GetWriteIndex();
- x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
- x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<emitter->GetWriteIndex());
- }
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
- }
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S ||
- imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
- imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
- {
- if(g_CPUFeatures.x86.bmi2)
- {
- if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
- x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
- else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
- x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
- else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
- x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
- }
- else
- {
- cemu_assert_debug(rRegOperand2 == X86_REG_ECX);
- bool useTempReg = rRegResult == X86_REG_ECX && rRegOperand1 != X86_REG_ECX;
- auto origRegResult = rRegResult;
- if(useTempReg)
- {
- x64GenContext->emitter->MOV_dd(REG_RESV_TEMP, rRegOperand1);
- rRegResult = REG_RESV_TEMP;
- }
- if(rRegOperand1 != rRegResult)
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
- if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
- x64GenContext->emitter->SAR_d_CL(rRegResult);
- else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
- x64GenContext->emitter->SHR_d_CL(rRegResult);
- else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
- x64GenContext->emitter->SHL_d_CL(rRegResult);
- if(useTempReg)
- x64GenContext->emitter->MOV_dd(origRegResult, REG_RESV_TEMP);
- }
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
- {
- x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
- x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
- // mov operand 2 to temp register
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2);
- // mov operand1 to EAX
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1);
- // sign or zero extend EAX to EDX:EAX based on division sign mode
- if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED )
- x64Gen_cdq(x64GenContext);
- else
- x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX);
- // make sure we avoid division by zero
- x64Gen_test_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
- x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 3);
- // divide
- if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED )
- x64Gen_idiv_reg64Low32(x64GenContext, REG_RESV_TEMP);
- else
- x64Gen_div_reg64Low32(x64GenContext, REG_RESV_TEMP);
- // result of division is now stored in EAX, move it to result register
- if( rRegResult != X86_REG_EAX )
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EAX);
- // restore EAX / EDX
- if( rRegResult != X86_REG_RAX )
- x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
- if( rRegResult != X86_REG_RDX )
- x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]));
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED )
- {
- x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
- x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
- // mov operand 2 to temp register
- x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2);
- // mov operand1 to EAX
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1);
- if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED )
- {
- // zero extend EAX to EDX:EAX
- x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX);
- }
- else
- {
- // sign extend EAX to EDX:EAX
- x64Gen_cdq(x64GenContext);
- }
- // multiply
- if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED )
- x64Gen_imul_reg64Low32(x64GenContext, REG_RESV_TEMP);
- else
- x64Gen_mul_reg64Low32(x64GenContext, REG_RESV_TEMP);
- // result of multiplication is now stored in EDX:EAX, move it to result register
- if( rRegResult != X86_REG_EDX )
- x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EDX);
- // restore EAX / EDX
- if( rRegResult != X86_REG_RAX )
- x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
- if( rRegResult != X86_REG_RDX )
- x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]));
- }
- else
- {
- cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
- return false;
- }
- return true;
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR);
- auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA);
- auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB);
- auto regCarry = _reg32(imlInstruction->op_r_r_r_carry.regCarry);
- bool carryRegIsShared = regCarry == regA || regCarry == regB;
- cemu_assert_debug(regCarry != regR); // two outputs sharing the same register is undefined behavior
-
- switch (imlInstruction->operation)
- {
- case PPCREC_IML_OP_ADD:
- if (regB == regR)
- std::swap(regB, regA);
- if (regR != regA)
- x64GenContext->emitter->MOV_dd(regR, regA);
- if(!carryRegIsShared)
- x64GenContext->emitter->XOR_dd(regCarry, regCarry);
- x64GenContext->emitter->ADD_dd(regR, regB);
- x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); // below condition checks carry flag
- if(carryRegIsShared)
- x64GenContext->emitter->AND_di8(regCarry, 1); // clear upper bits
- break;
- case PPCREC_IML_OP_ADD_WITH_CARRY:
- // assumes that carry is already correctly initialized as 0 or 1
- if (regB == regR)
- std::swap(regB, regA);
- if (regR != regA)
- x64GenContext->emitter->MOV_dd(regR, regA);
- x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
- x64GenContext->emitter->ADC_dd(regR, regB);
- x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
- break;
- default:
- cemu_assert_unimplemented();
- return false;
- }
- return true;
-}
-
-bool PPCRecompilerX64Gen_IsSameCompare(IMLInstruction* imlInstructionA, IMLInstruction* imlInstructionB)
-{
- if(imlInstructionA->type != imlInstructionB->type)
- return false;
- if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE)
- return imlInstructionA->op_compare.regA == imlInstructionB->op_compare.regA && imlInstructionA->op_compare.regB == imlInstructionB->op_compare.regB;
- else if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE_S32)
- return imlInstructionA->op_compare_s32.regA == imlInstructionB->op_compare_s32.regA && imlInstructionA->op_compare_s32.immS32 == imlInstructionB->op_compare_s32.immS32;
- return false;
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, sint32& extraInstructionsProcessed)
-{
- extraInstructionsProcessed = 0;
- boost::container::static_vector compareInstructions;
- compareInstructions.push_back(imlInstruction);
- for(sint32 i=1; i<4; i++)
- {
- IMLInstruction* nextIns = x64GenContext->GetNextInstruction(i);
- if(!nextIns || !PPCRecompilerX64Gen_IsSameCompare(imlInstruction, nextIns))
- break;
- compareInstructions.push_back(nextIns);
- }
- auto OperandOverlapsWithR = [&](IMLInstruction* ins) -> bool
- {
- cemu_assert_debug(ins->type == PPCREC_IML_TYPE_COMPARE || ins->type == PPCREC_IML_TYPE_COMPARE_S32);
- if(ins->type == PPCREC_IML_TYPE_COMPARE)
- return _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regA) || _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regB);
- else /* PPCREC_IML_TYPE_COMPARE_S32 */
- return _reg32_from_reg8(_reg8(ins->op_compare_s32.regR)) == _reg32(ins->op_compare_s32.regA);
- };
- auto GetRegR = [](IMLInstruction* insn)
- {
- return insn->type == PPCREC_IML_TYPE_COMPARE ? _reg32_from_reg8(_reg8(insn->op_compare.regR)) : _reg32_from_reg8(_reg8(insn->op_compare_s32.regR));
- };
- // prefer XOR method for zeroing out registers if possible
- for(auto& it : compareInstructions)
- {
- if(OperandOverlapsWithR(it))
- continue;
- auto regR = GetRegR(it);
- x64GenContext->emitter->XOR_dd(regR, regR); // zero bytes unaffected by SETcc
- }
- // emit the compare instruction
- if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
- {
- auto regA = _reg32(imlInstruction->op_compare.regA);
- auto regB = _reg32(imlInstruction->op_compare.regB);
- x64GenContext->emitter->CMP_dd(regA, regB);
- }
- else if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
- {
- auto regA = _reg32(imlInstruction->op_compare_s32.regA);
- sint32 imm = imlInstruction->op_compare_s32.immS32;
- x64GenContext->emitter->CMP_di32(regA, imm);
- }
- // emit the SETcc instructions
- for(auto& it : compareInstructions)
- {
- auto regR = _reg8(it->op_compare.regR);
- X86Cond cond = _x86Cond(it->op_compare.cond);
- if(OperandOverlapsWithR(it))
- x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0);
- x64GenContext->emitter->SETcc_b(cond, regR);
- }
- extraInstructionsProcessed = (sint32)compareInstructions.size() - 1;
- return true;
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
-{
- auto regBool = _reg8(imlInstruction->op_conditional_jump.registerBool);
- bool mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue;
- x64GenContext->emitter->TEST_bb(regBool, regBool);
- PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
- x64GenContext->emitter->Jcc_j32(mustBeTrue ? X86_CONDITION_NZ : X86_CONDITION_Z, 0);
- return true;
-}
-
-void PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
-{
- X86Cond cond = _x86Cond(imlInstruction->op_x86_eflags_jcc.cond, imlInstruction->op_x86_eflags_jcc.invertedCondition);
- PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
- x64GenContext->emitter->Jcc_j32(cond, 0);
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
-{
- PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
- x64GenContext->emitter->JMP_j32(0);
- return true;
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- auto regR = _reg32(imlInstruction->op_r_r_s32.regR);
- auto regA = _reg32(imlInstruction->op_r_r_s32.regA);
- uint32 immS32 = imlInstruction->op_r_r_s32.immS32;
-
- if( imlInstruction->operation == PPCREC_IML_OP_ADD )
- {
- uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
- if(regR != regA)
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
- x64Gen_add_reg64Low32_imm32(x64GenContext, regR, (uint32)immU32);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
- {
- if (regR != regA)
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
- x64Gen_sub_reg64Low32_imm32(x64GenContext, regR, immS32);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_AND ||
- imlInstruction->operation == PPCREC_IML_OP_OR ||
- imlInstruction->operation == PPCREC_IML_OP_XOR)
- {
- if (regR != regA)
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
- if (imlInstruction->operation == PPCREC_IML_OP_AND)
- x64Gen_and_reg64Low32_imm32(x64GenContext, regR, immS32);
- else if (imlInstruction->operation == PPCREC_IML_OP_OR)
- x64Gen_or_reg64Low32_imm32(x64GenContext, regR, immS32);
- else // XOR
- x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
- {
- // registerResult = registerOperand * immS32
- sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32;
- x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize
- if( regR != regA )
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
- x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ||
- imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
- imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
- {
- if( regA != regR )
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
- if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
- x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
- else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
- x64Gen_shr_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
- else // RIGHT_SHIFT_S
- x64Gen_sar_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
- }
- else
- {
- debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
- return false;
- }
- return true;
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR);
- auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA);
- sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32;
- auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry);
- cemu_assert_debug(regCarry != regR); // we dont allow two different outputs sharing the same register
-
- bool delayCarryInit = regCarry == regA;
-
- switch (imlInstruction->operation)
- {
- case PPCREC_IML_OP_ADD:
- if(!delayCarryInit)
- x64GenContext->emitter->XOR_dd(regCarry, regCarry);
- if (regR != regA)
- x64GenContext->emitter->MOV_dd(regR, regA);
- x64GenContext->emitter->ADD_di32(regR, immS32);
- if(delayCarryInit)
- x64GenContext->emitter->MOV_di32(regCarry, 0);
- x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
- break;
- case PPCREC_IML_OP_ADD_WITH_CARRY:
- // assumes that carry is already correctly initialized as 0 or 1
- cemu_assert_debug(regCarry != regR);
- if (regR != regA)
- x64GenContext->emitter->MOV_dd(regR, regA);
- x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
- x64GenContext->emitter->ADC_di32(regR, immS32);
- x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
- break;
- default:
- cemu_assert_unimplemented();
- return false;
- }
- return true;
-}
-
-bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- // some tests (all performed on a i7-4790K)
- // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency)
- // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC
-
- // BT
- x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
- cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken());
- PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken());
- x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0);
- return true;
-}
-
-void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- uint32 name = imlInstruction->op_r_name.name;
- if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
- {
- auto regR = _reg64(imlInstruction->op_r_name.regR);
- if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
- {
- x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0));
- }
- else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
- {
- sint32 sprIndex = (name - PPCREC_NAME_SPR0);
- if (sprIndex == SPR_LR)
- x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR));
- else if (sprIndex == SPR_CTR)
- x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR));
- else if (sprIndex == SPR_XER)
- x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER));
- else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
- {
- sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
- x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, memOffset);
- }
- else
- assert_dbg();
- }
- else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
- {
- x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
- }
- else if (name == PPCREC_NAME_XER_CA)
- {
- x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca));
- }
- else if (name == PPCREC_NAME_XER_SO)
- {
- x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so));
- }
- else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
- {
- x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
- }
- else if (name == PPCREC_NAME_CPU_MEMRES_EA)
- {
- x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr));
- }
- else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
- {
- x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue));
- }
- else
- assert_dbg();
- }
- else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
- {
- auto regR = _regF64(imlInstruction->op_r_name.regR);
- if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
- {
- sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
- sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
- x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double));
- }
- else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
- {
- x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
- }
- else
- {
- cemu_assert_debug(false);
- }
- }
- else
- DEBUG_BREAK;
-
-}
-
-void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- uint32 name = imlInstruction->op_r_name.name;
-
- if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
- {
- auto regR = _reg64(imlInstruction->op_r_name.regR);
- if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
- {
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR);
- }
- else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
- {
- uint32 sprIndex = (name - PPCREC_NAME_SPR0);
- if (sprIndex == SPR_LR)
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), regR);
- else if (sprIndex == SPR_CTR)
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR), regR);
- else if (sprIndex == SPR_XER)
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER), regR);
- else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
- {
- sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, memOffset, regR);
- }
- else
- assert_dbg();
- }
- else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
- {
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR);
- }
- else if (name == PPCREC_NAME_XER_CA)
- {
- x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR));
- }
- else if (name == PPCREC_NAME_XER_SO)
- {
- x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR));
- }
- else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
- {
- x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR));
- }
- else if (name == PPCREC_NAME_CPU_MEMRES_EA)
- {
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr), regR);
- }
- else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
- {
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue), regR);
- }
- else
- assert_dbg();
- }
- else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
- {
- auto regR = _regF64(imlInstruction->op_r_name.regR);
- uint32 name = imlInstruction->op_r_name.name;
- if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
- {
- sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
- sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
- x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0));
- }
- else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
- {
- x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
- }
- else
- {
- cemu_assert_debug(false);
- }
- }
- else
- DEBUG_BREAK;
-
-
-}
-
-uint8* codeMemoryBlock = nullptr;
-sint32 codeMemoryBlockIndex = 0;
-sint32 codeMemoryBlockSize = 0;
-
-std::mutex mtx_allocExecutableMemory;
-
-uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size)
-{
- std::lock_guard lck(mtx_allocExecutableMemory);
- if( codeMemoryBlockIndex+size > codeMemoryBlockSize )
- {
- // allocate new block
- codeMemoryBlockSize = std::max(1024*1024*4, size+1024); // 4MB (or more if the function is larger than 4MB)
- codeMemoryBlockIndex = 0;
- codeMemoryBlock = (uint8*)MemMapper::AllocateMemory(nullptr, codeMemoryBlockSize, MemMapper::PAGE_PERMISSION::P_RWX);
- }
- uint8* codeMem = codeMemoryBlock + codeMemoryBlockIndex;
- codeMemoryBlockIndex += size;
- // pad to 4 byte alignment
- while (codeMemoryBlockIndex & 3)
- {
- codeMemoryBlock[codeMemoryBlockIndex] = 0x90;
- codeMemoryBlockIndex++;
- }
- return codeMem;
-}
-
-bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext)
-{
- x64GenContext_t x64GenContext{};
-
- // generate iml instruction code
- bool codeGenerationFailed = false;
- for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
- {
- x64GenContext.currentSegment = segIt;
- segIt->x64Offset = x64GenContext.emitter->GetWriteIndex();
- for(size_t i=0; iimlList.size(); i++)
- {
- x64GenContext.m_currentInstructionEmitIndex = i;
- IMLInstruction* imlInstruction = segIt->imlList.data() + i;
-
- if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME )
- {
- PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R )
- {
- PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_R_R )
- {
- if( PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
- {
- if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
- {
- if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY)
- {
- if (PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
- {
- if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY)
- {
- if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
- codeGenerationFailed = true;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
- {
- sint32 extraInstructionsProcessed;
- PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, extraInstructionsProcessed);
- i += extraInstructionsProcessed;
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
- {
- if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
- codeGenerationFailed = true;
- }
- else if(imlInstruction->type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
- {
- PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
- {
- if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
- codeGenerationFailed = true;
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
- {
- PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
- {
- if( PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD )
- {
- if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED )
- {
- if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_STORE )
- {
- if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED )
- {
- if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
- {
- PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM)
- {
- PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP )
- {
- // no op
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD )
- {
- if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED )
- {
- if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE )
- {
- if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED )
- {
- if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false )
- {
- codeGenerationFailed = true;
- }
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R )
- {
- PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R )
- {
- PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R )
- {
- PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R )
- {
- PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE)
- {
- PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
- }
- else
- {
- debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type);
- assert_dbg();
- }
- }
- }
- // handle failed code generation
- if( codeGenerationFailed )
- {
- return false;
- }
- // allocate executable memory
- uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
- size_t baseAddress = (size_t)executableMemory;
- // fix relocs
- for(auto& relocIt : x64GenContext.relocateOffsetTable2)
- {
- // search for segment that starts with this offset
- uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo;
- uint32 x64Offset = 0xFFFFFFFF;
-
- IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo;
- x64Offset = destSegment->x64Offset;
-
- uint32 relocBase = relocIt.offset;
- uint8* relocInstruction = x64GenContext.emitter->GetBufferPtr()+relocBase;
- if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) )
- {
- // Jcc relativeImm32
- sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2));
- if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled
- {
- // convert to near Jcc
- *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70);
- // patch offset
- *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump;
- // replace unused 4 bytes with NOP instruction
- relocInstruction[2] = 0x0F;
- relocInstruction[3] = 0x1F;
- relocInstruction[4] = 0x40;
- relocInstruction[5] = 0x00;
- }
- else
- {
- // patch offset
- *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6));
- }
- }
- else if( relocInstruction[0] == 0xE9 )
- {
- // JMP relativeImm32
- *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5));
- }
- else
- assert_dbg();
- }
-
- // copy code to executable memory
- std::span codeBuffer = x64GenContext.emitter->GetBuffer();
- memcpy(executableMemory, codeBuffer.data(), codeBuffer.size_bytes());
- // set code
- PPCRecFunction->x86Code = executableMemory;
- PPCRecFunction->x86Size = codeBuffer.size_bytes();
- return true;
-}
-
-void PPCRecompilerX64Gen_generateEnterRecompilerCode()
-{
- x64GenContext_t x64GenContext{};
-
- // start of recompiler entry function (15 regs)
- x64Gen_push_reg64(&x64GenContext, X86_REG_RAX);
- x64Gen_push_reg64(&x64GenContext, X86_REG_RCX);
- x64Gen_push_reg64(&x64GenContext, X86_REG_RDX);
- x64Gen_push_reg64(&x64GenContext, X86_REG_RBX);
- x64Gen_push_reg64(&x64GenContext, X86_REG_RBP);
- x64Gen_push_reg64(&x64GenContext, X86_REG_RDI);
- x64Gen_push_reg64(&x64GenContext, X86_REG_RSI);
- x64Gen_push_reg64(&x64GenContext, X86_REG_R8);
- x64Gen_push_reg64(&x64GenContext, X86_REG_R9);
- x64Gen_push_reg64(&x64GenContext, X86_REG_R10);
- x64Gen_push_reg64(&x64GenContext, X86_REG_R11);
- x64Gen_push_reg64(&x64GenContext, X86_REG_R12);
- x64Gen_push_reg64(&x64GenContext, X86_REG_R13);
- x64Gen_push_reg64(&x64GenContext, X86_REG_R14);
- x64Gen_push_reg64(&x64GenContext, X86_REG_R15);
-
- // 000000007775EF04 | E8 00 00 00 00 call +0x00
- x64Gen_writeU8(&x64GenContext, 0xE8);
- x64Gen_writeU8(&x64GenContext, 0x00);
- x64Gen_writeU8(&x64GenContext, 0x00);
- x64Gen_writeU8(&x64GenContext, 0x00);
- x64Gen_writeU8(&x64GenContext, 0x00);
- //000000007775EF09 | 48 83 04 24 05 add qword ptr ss:[rsp],5
- x64Gen_writeU8(&x64GenContext, 0x48);
- x64Gen_writeU8(&x64GenContext, 0x83);
- x64Gen_writeU8(&x64GenContext, 0x04);
- x64Gen_writeU8(&x64GenContext, 0x24);
- uint32 jmpPatchOffset = x64GenContext.emitter->GetWriteIndex();
- x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP
- x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP);
-
- // MOV RSP, RDX (ppc interpreter instance)
- x64Gen_mov_reg64_reg64(&x64GenContext, REG_RESV_HCPU, X86_REG_RDX);
- // MOV R15, ppcRecompilerInstanceData
- x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData);
- // MOV R13, memory_base
- x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base);
-
- //JMP recFunc
- x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1
-
- x64GenContext.emitter->GetBuffer()[jmpPatchOffset] = (x64GenContext.emitter->GetWriteIndex() -(jmpPatchOffset-4));
-
- //recompilerExit1:
- x64Gen_pop_reg64(&x64GenContext, X86_REG_R15);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_R14);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_R13);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_R12);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_R11);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_R10);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_R9);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_R8);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_RSI);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_RDI);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_RBP);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_RBX);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_RDX);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_RCX);
- x64Gen_pop_reg64(&x64GenContext, X86_REG_RAX);
- // RET
- x64Gen_ret(&x64GenContext);
-
- uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
- // copy code to executable memory
- memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes());
- PPCRecompiler_enterRecompilerCode = (void ATTR_MS_ABI (*)(uint64,uint64))executableMemory;
-}
-
-
-void* PPCRecompilerX64Gen_generateLeaveRecompilerCode()
-{
- x64GenContext_t x64GenContext{};
-
- // update instruction pointer
- // LR is in EDX
- x64Emit_mov_mem32_reg32(&x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), X86_REG_EDX);
- // MOV RSP, [hCPU->rspTemp]
- x64Emit_mov_reg64_mem64(&x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp));
- // RET
- x64Gen_ret(&x64GenContext);
-
- uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
- // copy code to executable memory
- memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes());
- return executableMemory;
-}
-
-void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions()
-{
- PPCRecompilerX64Gen_generateEnterRecompilerCode();
- PPCRecompiler_leaveRecompilerCode_unvisited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode();
- PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode();
- cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited);
-}
-
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
deleted file mode 100644
index 6a8b1b97..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
+++ /dev/null
@@ -1,469 +0,0 @@
-#include "../PPCRecompiler.h"
-#include "../IML/IML.h"
-#include "BackendX64.h"
-#include "Common/cpu_features.h"
-
-uint32 _regF64(IMLReg physReg);
-
-uint32 _regI32(IMLReg r)
-{
- cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32);
- return (uint32)r.GetRegID();
-}
-
-static x86Assembler64::GPR32 _reg32(sint8 physRegId)
-{
- return (x86Assembler64::GPR32)physRegId;
-}
-
-static x86Assembler64::GPR8_REX _reg8(IMLReg r)
-{
- cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); // currently bool regs are implemented as 32bit registers
- return (x86Assembler64::GPR8_REX)r.GetRegID();
-}
-
-static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
-{
- return (x86Assembler64::GPR32)regId;
-}
-
-static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
-{
- return (x86Assembler64::GPR8_REX)regId;
-}
-
-// load from memory
-bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
-{
- sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
- sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
- sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
- if( indexed )
- realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
- uint8 mode = imlInstruction->op_storeLoad.mode;
-
- if( mode == PPCREC_FPR_LD_MODE_SINGLE )
- {
- // load byte swapped single into temporary FPR
- if( indexed )
- {
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
- if(g_CPUFeatures.x86.movbe)
- x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
- else
- x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
- }
- else
- {
- if(g_CPUFeatures.x86.movbe)
- x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
- else
- x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
- }
- if(g_CPUFeatures.x86.movbe == false )
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP);
-
- if (imlInstruction->op_storeLoad.flags2.notExpanded)
- {
- // leave value as single
- }
- else
- {
- x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM);
- }
- }
- else if( mode == PPCREC_FPR_LD_MODE_DOUBLE )
- {
- if( g_CPUFeatures.x86.avx )
- {
- if( indexed )
- {
- // calculate offset
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
- // load value
- x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0);
- x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
- x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP);
- }
- else
- {
- x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0);
- x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
- x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP);
- }
- }
- else
- {
- if( indexed )
- {
- // calculate offset
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
- // load double low part to temporaryFPR
- x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0);
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP);
- // calculate offset again
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
- // load double high part to temporaryFPR
- x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4);
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP);
- // load double from temporaryFPR
- x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
- }
- else
- {
- // load double low part to temporaryFPR
- x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0);
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP);
- // load double high part to temporaryFPR
- x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4);
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP);
- // load double from temporaryFPR
- x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
- }
- }
- }
- else
- {
- return false;
- }
- return true;
-}
-
-// store to memory
-bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
-{
- sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
- sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
- sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
- if( indexed )
- realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
- uint8 mode = imlInstruction->op_storeLoad.mode;
- if( mode == PPCREC_FPR_ST_MODE_SINGLE )
- {
- if (imlInstruction->op_storeLoad.flags2.notExpanded)
- {
- // value is already in single format
- x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
- }
- else
- {
- x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM);
- x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
- }
- if(g_CPUFeatures.x86.movbe == false )
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- if( indexed )
- {
- if( realRegisterMem == realRegisterMem2 )
- assert_dbg();
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- if(g_CPUFeatures.x86.movbe)
- x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
- else
- x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
- if( indexed )
- {
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- }
- else if( mode == PPCREC_FPR_ST_MODE_DOUBLE )
- {
- if( indexed )
- {
- if( realRegisterMem == realRegisterMem2 )
- assert_dbg();
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
- // store double low part
- x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0);
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP);
- // store double high part
- x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4);
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP);
- if( indexed )
- {
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- }
- else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 )
- {
- x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
- x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
- if( indexed )
- {
- cemu_assert_debug(realRegisterMem == realRegisterMem2);
- x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
- x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
- }
- else
- {
- x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
- }
- }
- else
- {
- debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
- return false;
- }
- return true;
-}
-
-// FPR op FPR
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT )
- {
- uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR);
- uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA);
- x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr);
- return;
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT )
- {
- uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
- uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
- x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
- return;
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
- {
- cemu_assert_debug(imlInstruction->op_fpr_r_r.regR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
- cemu_assert_debug(imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32); // supporting only 32bit floats as input for now
- // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
- uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
- if (imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32)
- {
- uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
- x64Gen_movq_xmmReg_reg64(x64GenContext, regFpr, regGpr); // using reg32 as reg64 param here is ok. We'll refactor later
- // float to double
- x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regFpr, regFpr);
- }
- else
- {
- cemu_assert_unimplemented();
- }
- return;
- }
-
- uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
- uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
- if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN )
- {
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY )
- {
- x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE )
- {
- x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD )
- {
- x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
- {
- x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ )
- {
- x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA);
- x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
- // move to FPR register
- x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
- }
- else
- {
- assert_dbg();
- }
-}
-
-/*
- * FPR = op (fprA, fprB)
- */
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r.regR);
- uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA);
- uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB);
-
- if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
- {
- if (regR == regA)
- {
- x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
- }
- else if (regR == regB)
- {
- x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- }
- else
- {
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
- }
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
- {
- // todo: Use AVX 3-operand VADDSD if available
- if (regR == regA)
- {
- x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
- }
- else if (regR == regB)
- {
- x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- }
- else
- {
- x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
- x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
- }
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
- {
- if( regR == regA )
- {
- x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
- }
- else if( regR == regB )
- {
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
- x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
- }
- else
- {
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
- x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
- }
- }
- else
- assert_dbg();
-}
-
-/*
- * FPR = op (fprA, fprB, fprC)
- */
-void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r_r.regR);
- uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r_r.regA);
- uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB);
- uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC);
-
- if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT )
- {
- x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
- sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
- x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
- // select C
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC);
- sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
- x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
- // select B
- PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
- x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB);
- // end
- PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
- }
- else
- assert_dbg();
-}
-
-void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- uint32 regR = _regF64(imlInstruction->op_fpr_r.regR);
-
- if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE )
- {
- x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE )
- {
- x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS )
- {
- x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS )
- {
- x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
- }
- else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM )
- {
- // convert to 32bit single
- x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, regR, regR);
- // convert back to 64bit double
- x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
- }
- else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
- {
- // convert bottom to 64bit double
- x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
- }
- else
- {
- cemu_assert_unimplemented();
- }
-}
-
-void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
-{
- auto regR = _reg8(imlInstruction->op_fpr_compare.regR);
- auto regA = _regF64(imlInstruction->op_fpr_compare.regA);
- auto regB = _regF64(imlInstruction->op_fpr_compare.regB);
-
- x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR));
- x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB);
-
- if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT)
- {
- // GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both)
- x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR);
- return;
- }
- else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U)
- {
- // unordered case can be checked via PF
- x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR);
- return;
- }
-
- // remember unordered state
- auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP));
- x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions
-
- X86Cond x86Cond;
- switch (imlInstruction->op_fpr_compare.cond)
- {
- case IMLCondition::UNORDERED_LT:
- x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR);
- break;
- case IMLCondition::UNORDERED_EQ:
- x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR);
- break;
- default:
- cemu_assert_unimplemented();
- }
- x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero
-}
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h
deleted file mode 100644
index eae3835d..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h
+++ /dev/null
@@ -1,4335 +0,0 @@
-#pragma once
-
-// x86-64 assembler/emitter
-// auto generated. Do not edit this file manually
-
-typedef unsigned long long u64;
-typedef unsigned int u32;
-typedef unsigned short u16;
-typedef unsigned char u8;
-typedef signed long long s64;
-typedef signed int s32;
-typedef signed short s16;
-typedef signed char s8;
-
-enum X86Reg : sint8
-{
- X86_REG_NONE = -1,
- X86_REG_EAX = 0,
- X86_REG_ECX = 1,
- X86_REG_EDX = 2,
- X86_REG_EBX = 3,
- X86_REG_ESP = 4,
- X86_REG_EBP = 5,
- X86_REG_ESI = 6,
- X86_REG_EDI = 7,
- X86_REG_R8D = 8,
- X86_REG_R9D = 9,
- X86_REG_R10D = 10,
- X86_REG_R11D = 11,
- X86_REG_R12D = 12,
- X86_REG_R13D = 13,
- X86_REG_R14D = 14,
- X86_REG_R15D = 15,
- X86_REG_RAX = 0,
- X86_REG_RCX = 1,
- X86_REG_RDX = 2,
- X86_REG_RBX = 3,
- X86_REG_RSP = 4,
- X86_REG_RBP = 5,
- X86_REG_RSI = 6,
- X86_REG_RDI = 7,
- X86_REG_R8 = 8,
- X86_REG_R9 = 9,
- X86_REG_R10 = 10,
- X86_REG_R11 = 11,
- X86_REG_R12 = 12,
- X86_REG_R13 = 13,
- X86_REG_R14 = 14,
- X86_REG_R15 = 15
-};
-
-enum X86Cond : u8
-{
- X86_CONDITION_O = 0,
- X86_CONDITION_NO = 1,
- X86_CONDITION_B = 2,
- X86_CONDITION_NB = 3,
- X86_CONDITION_Z = 4,
- X86_CONDITION_NZ = 5,
- X86_CONDITION_BE = 6,
- X86_CONDITION_NBE = 7,
- X86_CONDITION_S = 8,
- X86_CONDITION_NS = 9,
- X86_CONDITION_PE = 10,
- X86_CONDITION_PO = 11,
- X86_CONDITION_L = 12,
- X86_CONDITION_NL = 13,
- X86_CONDITION_LE = 14,
- X86_CONDITION_NLE = 15
-};
-class x86Assembler64
-{
-private:
- std::vector m_buffer;
-
-public:
- u8* GetBufferPtr() { return m_buffer.data(); };
- std::span GetBuffer() { return m_buffer; };
- u32 GetWriteIndex() { return (u32)m_buffer.size(); };
- void _emitU8(u8 v) { m_buffer.emplace_back(v); };
- void _emitU16(u16 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 2); *(u16*)(m_buffer.data() + writeIdx) = v; };
- void _emitU32(u32 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 4); *(u32*)(m_buffer.data() + writeIdx) = v; };
- void _emitU64(u64 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 8); *(u64*)(m_buffer.data() + writeIdx) = v; };
- using GPR64 = X86Reg;
- using GPR32 = X86Reg;
- using GPR8_REX = X86Reg;
- void LockPrefix() { _emitU8(0xF0); };
- void ADD_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x00);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void ADD_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x00);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADD_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x02);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADD_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x01);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void ADD_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x01);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void ADD_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x01);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADD_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x01);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADD_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x03);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADD_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x03);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void OR_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x08);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void OR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x08);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void OR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x0a);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void OR_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x09);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void OR_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x09);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void OR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x09);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void OR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x09);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void OR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x0b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void OR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x0b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADC_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x10);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void ADC_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x10);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADC_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x12);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADC_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x11);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void ADC_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x11);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void ADC_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x11);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADC_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x11);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADC_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x13);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADC_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x13);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SBB_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x18);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void SBB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x18);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SBB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x1a);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SBB_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x19);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void SBB_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x19);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void SBB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x19);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SBB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x19);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SBB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x1b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SBB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x1b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void AND_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x20);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void AND_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x20);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void AND_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x22);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void AND_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x21);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void AND_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x21);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void AND_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x21);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void AND_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x21);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void AND_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x23);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void AND_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x23);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SUB_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x28);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void SUB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x28);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SUB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x2a);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SUB_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x29);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void SUB_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x29);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void SUB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x29);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SUB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x29);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SUB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x2b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SUB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x2b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XOR_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x30);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void XOR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x30);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XOR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x32);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XOR_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x31);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void XOR_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x31);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void XOR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x31);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XOR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x31);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XOR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x33);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XOR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x33);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void CMP_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x38);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void CMP_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x38);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void CMP_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x3a);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void CMP_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x39);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void CMP_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x39);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void CMP_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x39);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void CMP_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x39);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void CMP_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x3b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void CMP_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x3b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void ADD_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x81);
- _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void ADD_qi32(GPR64 dst, s32 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x81);
- _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void ADD_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void ADD_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void OR_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x81);
- _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void OR_qi32(GPR64 dst, s32 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x81);
- _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void OR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void OR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void ADC_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x81);
- _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void ADC_qi32(GPR64 dst, s32 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x81);
- _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void ADC_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void ADC_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void SBB_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x81);
- _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void SBB_qi32(GPR64 dst, s32 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x81);
- _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void SBB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void SBB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void AND_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x81);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void AND_qi32(GPR64 dst, s32 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x81);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void AND_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void AND_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void SUB_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x81);
- _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void SUB_qi32(GPR64 dst, s32 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x81);
- _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void SUB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void SUB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void XOR_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x81);
- _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void XOR_qi32(GPR64 dst, s32 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x81);
- _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void XOR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void XOR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void CMP_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x81);
- _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void CMP_qi32(GPR64 dst, s32 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x81);
- _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
- _emitU32((u32)imm);
- }
- void CMP_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void CMP_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x81);
- _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void ADD_di8(GPR32 dst, s8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x83);
- _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void ADD_qi8(GPR64 dst, s8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x83);
- _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void ADD_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void ADD_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void OR_di8(GPR32 dst, s8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x83);
- _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void OR_qi8(GPR64 dst, s8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x83);
- _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void OR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void OR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void ADC_di8(GPR32 dst, s8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x83);
- _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void ADC_qi8(GPR64 dst, s8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x83);
- _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void ADC_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void ADC_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void SBB_di8(GPR32 dst, s8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x83);
- _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void SBB_qi8(GPR64 dst, s8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x83);
- _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void SBB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void SBB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void AND_di8(GPR32 dst, s8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x83);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void AND_qi8(GPR64 dst, s8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x83);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void AND_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void AND_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void SUB_di8(GPR32 dst, s8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x83);
- _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void SUB_qi8(GPR64 dst, s8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x83);
- _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void SUB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void SUB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void XOR_di8(GPR32 dst, s8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x83);
- _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void XOR_qi8(GPR64 dst, s8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x83);
- _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void XOR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void XOR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void CMP_di8(GPR32 dst, s8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x83);
- _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void CMP_qi8(GPR64 dst, s8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x83);
- _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void CMP_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void CMP_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x83);
- _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void TEST_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x84);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void TEST_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x84);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void TEST_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x85);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void TEST_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x85);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void TEST_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x85);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void TEST_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x85);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XCHG_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((dst >= 4) || (src >= 4))
- {
- _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
- }
- _emitU8(0x86);
- _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
- }
- void XCHG_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x86);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XCHG_dd(GPR32 dst, GPR32 src)
- {
- if (((dst & 8) != 0) || ((src & 8) != 0))
- {
- _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
- }
- _emitU8(0x87);
- _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
- }
- void XCHG_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
- _emitU8(0x87);
- _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
- }
- void XCHG_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x87);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void XCHG_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x87);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void MOV_bb(GPR8_REX dst, GPR8_REX src)
- {
- if ((src >= 4) || (dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x88);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void MOV_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src >= 4) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x88);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void MOV_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst >= 4) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x8a);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void MOV_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x89);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void MOV_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x89);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void MOV_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x89);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void MOV_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x89);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void MOV_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x8b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void MOV_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x8b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void MOV_di32(GPR32 dst, s32 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0xb8 | ((dst) & 7));
- _emitU32((u32)imm);
- }
- void MOV_qi64(GPR64 dst, s64 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0xb8 | ((dst) & 7));
- _emitU64((u64)imm);
- }
- void CALL_q(GPR64 dst)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0xff);
- _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7));
- }
- void CALL_q_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0xff);
- _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void IMUL_ddi32(GPR32 dst, GPR32 src, s32 imm)
- {
- if (((dst & 8) != 0) || ((src & 8) != 0))
- {
- _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
- }
- _emitU8(0x69);
- _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
- _emitU32((u32)imm);
- }
- void IMUL_qqi32(GPR64 dst, GPR64 src, s32 imm)
- {
- _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
- _emitU8(0x69);
- _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
- _emitU32((u32)imm);
- }
- void IMUL_ddi32_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x69);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void IMUL_qqi32_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x69);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU32((u32)imm);
- }
- void IMUL_ddi8(GPR32 dst, GPR32 src, s8 imm)
- {
- if (((dst & 8) != 0) || ((src & 8) != 0))
- {
- _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
- }
- _emitU8(0x6b);
- _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
- _emitU8((u8)imm);
- }
- void IMUL_qqi8(GPR64 dst, GPR64 src, s8 imm)
- {
- _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
- _emitU8(0x6b);
- _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
- _emitU8((u8)imm);
- }
- void IMUL_ddi8_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((dst & 8) || (memReg & 8))
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x6b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void IMUL_qqi8_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x6b);
- _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void SHL_b_CL(GPR8_REX dst)
- {
- if ((dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0xd2);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- }
- void SHL_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0xd2);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SHR_b_CL(GPR8_REX dst)
- {
- if ((dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0xd2);
- _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
- }
- void SHR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0xd2);
- _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SAR_b_CL(GPR8_REX dst)
- {
- if ((dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0xd2);
- _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
- }
- void SAR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0xd2);
- _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SHL_d_CL(GPR32 dst)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0xd3);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- }
- void SHL_q_CL(GPR64 dst)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0xd3);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- }
- void SHL_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0xd3);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SHL_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0xd3);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SHR_d_CL(GPR32 dst)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0xd3);
- _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
- }
- void SHR_q_CL(GPR64 dst)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0xd3);
- _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7));
- }
- void SHR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0xd3);
- _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SHR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0xd3);
- _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SAR_d_CL(GPR32 dst)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0xd3);
- _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
- }
- void SAR_q_CL(GPR64 dst)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0xd3);
- _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7));
- }
- void SAR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0xd3);
- _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void SAR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0xd3);
- _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void JMP_j32(s32 imm)
- {
- _emitU8(0xe9);
- _emitU32((u32)imm);
- }
- void Jcc_j32(X86Cond cond, s32 imm)
- {
- _emitU8(0x0f);
- _emitU8(0x80 | (u8)cond);
- _emitU32((u32)imm);
- }
- void SETcc_b(X86Cond cond, GPR8_REX dst)
- {
- if ((dst >= 4))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x0f);
- _emitU8(0x90 | (u8)cond);
- _emitU8((3 << 6) | (dst & 7));
- }
- void SETcc_b_l(X86Cond cond, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x0f);
- _emitU8(0x90);
- _emitU8((mod << 6) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void CMPXCHG_dd(GPR32 dst, GPR32 src)
- {
- if (((src & 8) != 0) || ((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- }
- _emitU8(0x0f);
- _emitU8(0xb1);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void CMPXCHG_qq(GPR64 dst, GPR64 src)
- {
- _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
- _emitU8(0x0f);
- _emitU8(0xb1);
- _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
- }
- void CMPXCHG_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((src & 8) || (memReg & 8))
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
- }
- _emitU8(0x0f);
- _emitU8(0xb1);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void CMPXCHG_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x0f);
- _emitU8(0xb1);
- _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- }
- void BSWAP_d(GPR32 dst)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x0f);
- _emitU8(0xc8 | ((dst) & 7));
- }
- void BSWAP_q(GPR64 dst)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x0f);
- _emitU8(0xc8 | ((dst) & 7));
- }
- void BT_du8(GPR32 dst, u8 imm)
- {
- if (((dst & 8) != 0))
- {
- _emitU8(0x40 | ((dst & 8) >> 3));
- }
- _emitU8(0x0f);
- _emitU8(0xba);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void BT_qu8(GPR64 dst, u8 imm)
- {
- _emitU8(0x48 | ((dst & 8) >> 3));
- _emitU8(0x0f);
- _emitU8(0xba);
- _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7));
- _emitU8((u8)imm);
- }
- void BT_du8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2));
- }
- else
- {
- if ((memReg & 8))
- _emitU8(0x40 | ((memReg & 8) >> 1));
- }
- _emitU8(0x0f);
- _emitU8(0xba);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
- void BT_qu8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm)
- {
- uint8 mod;
- if (offset == 0 && (memReg & 7) != 5) mod = 0;
- else if (offset == (s32)(s8)offset) mod = 1;
- else mod = 2;
- bool sib_use = (scaler != 0 && index != X86_REG_NONE);
- if ((memReg & 7) == 4)
- {
- cemu_assert_debug(index == X86_REG_NONE);
- index = memReg;
- sib_use = true;
- }
- if (sib_use)
- {
- _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
- }
- else
- {
- _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08);
- }
- _emitU8(0x0f);
- _emitU8(0xba);
- _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
- if (sib_use)
- {
- _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
- }
- if (mod == 1) _emitU8((u8)offset);
- else if (mod == 2) _emitU32((u32)offset);
- _emitU8((u8)imm);
- }
-};
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h
deleted file mode 100644
index bc0c27c5..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-#include "IMLInstruction.h"
-#include "IMLSegment.h"
-
-// optimizer passes
-void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext);
-void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext);
-void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext);
-
-void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext);
-
-// debug
-void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut);
-void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false);
-void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false);
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
deleted file mode 100644
index 6ae4b591..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-#include "IML.h"
-//#include "PPCRecompilerIml.h"
-#include "util/helpers/fixedSizeList.h"
-
-#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp
deleted file mode 100644
index cd269869..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp
+++ /dev/null
@@ -1,561 +0,0 @@
-#include "IML.h"
-#include "IMLInstruction.h"
-#include "IMLSegment.h"
-#include "IMLRegisterAllocatorRanges.h"
-#include "util/helpers/StringBuf.h"
-
-#include "../PPCRecompiler.h"
-
-const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
-{
- static char _tempOpcodename[32];
- uint32 op = iml->operation;
- if (op == PPCREC_IML_OP_ASSIGN)
- return "MOV";
- else if (op == PPCREC_IML_OP_ADD)
- return "ADD";
- else if (op == PPCREC_IML_OP_ADD_WITH_CARRY)
- return "ADC";
- else if (op == PPCREC_IML_OP_SUB)
- return "SUB";
- else if (op == PPCREC_IML_OP_OR)
- return "OR";
- else if (op == PPCREC_IML_OP_AND)
- return "AND";
- else if (op == PPCREC_IML_OP_XOR)
- return "XOR";
- else if (op == PPCREC_IML_OP_LEFT_SHIFT)
- return "LSH";
- else if (op == PPCREC_IML_OP_RIGHT_SHIFT_U)
- return "RSH";
- else if (op == PPCREC_IML_OP_RIGHT_SHIFT_S)
- return "ARSH";
- else if (op == PPCREC_IML_OP_LEFT_ROTATE)
- return "LROT";
- else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED)
- return "MULS";
- else if (op == PPCREC_IML_OP_DIVIDE_SIGNED)
- return "DIVS";
- else if (op == PPCREC_IML_OP_FPR_ASSIGN)
- return "FMOV";
- else if (op == PPCREC_IML_OP_FPR_ADD)
- return "FADD";
- else if (op == PPCREC_IML_OP_FPR_SUB)
- return "FSUB";
- else if (op == PPCREC_IML_OP_FPR_MULTIPLY)
- return "FMUL";
- else if (op == PPCREC_IML_OP_FPR_DIVIDE)
- return "FDIV";
- else if (op == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
- return "F32TOF64";
- else if (op == PPCREC_IML_OP_FPR_ABS)
- return "FABS";
- else if (op == PPCREC_IML_OP_FPR_NEGATE)
- return "FNEG";
- else if (op == PPCREC_IML_OP_FPR_NEGATIVE_ABS)
- return "FNABS";
- else if (op == PPCREC_IML_OP_FPR_FLOAT_TO_INT)
- return "F2I";
- else if (op == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
- return "I2F";
- else if (op == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
- return "BITMOVE";
-
- sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type);
- return _tempOpcodename;
-}
-
-std::string IMLDebug_GetRegName(IMLReg r)
-{
- std::string regName;
- uint32 regId = r.GetRegID();
- switch (r.GetRegFormat())
- {
- case IMLRegFormat::F32:
- regName.append("f");
- break;
- case IMLRegFormat::F64:
- regName.append("fd");
- break;
- case IMLRegFormat::I32:
- regName.append("i");
- break;
- case IMLRegFormat::I64:
- regName.append("r");
- break;
- default:
- DEBUG_BREAK;
- }
- regName.append(fmt::format("{}", regId));
- return regName;
-}
-
-void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false)
-{
- strOutput.add(IMLDebug_GetRegName(virtualRegister));
- if (!isLast)
- strOutput.add(", ");
-}
-
-void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false)
-{
- if (val < 0)
- {
- strOutput.add("-");
- val = -val;
- }
- strOutput.addFmt("0x{:08x}", val);
- if (!isLast)
- strOutput.add(", ");
-}
-
-void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset)
-{
- // pad to 70 characters
- sint32 index = currentLineText.getLen();
- while (index < 70)
- {
- currentLineText.add(" ");
- index++;
- }
- raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- while (subrangeItr)
- {
- if (subrangeItr->interval.start.GetInstructionIndexEx() == offset)
- {
- if(subrangeItr->interval.start.IsInstructionIndex() && !subrangeItr->interval.start.IsOnInputEdge())
- currentLineText.add(".");
- else
- currentLineText.add("|");
-
- currentLineText.addFmt("{:<4}", subrangeItr->GetVirtualRegister());
- }
- else if (subrangeItr->interval.end.GetInstructionIndexEx() == offset)
- {
- if(subrangeItr->interval.end.IsInstructionIndex() && !subrangeItr->interval.end.IsOnOutputEdge())
- currentLineText.add("* ");
- else
- currentLineText.add("| ");
- }
- else if (subrangeItr->interval.ContainsInstructionIndexEx(offset))
- {
- currentLineText.add("| ");
- }
- else
- {
- currentLineText.add(" ");
- }
- index += 5;
- // next
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- }
-}
-
-std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg)
-{
- if (!ctx)
- {
- return "";
- }
- // find segment index
- for (size_t i = 0; i < ctx->segmentList2.size(); i++)
- {
- if (ctx->segmentList2[i] == seg)
- {
- return fmt::format("Seg{:04x}", i);
- }
- }
- return "";
-}
-
-std::string IMLDebug_GetConditionName(IMLCondition cond)
-{
- switch (cond)
- {
- case IMLCondition::EQ:
- return "EQ";
- case IMLCondition::NEQ:
- return "NEQ";
- case IMLCondition::UNSIGNED_GT:
- return "UGT";
- case IMLCondition::UNSIGNED_LT:
- return "ULT";
- case IMLCondition::SIGNED_GT:
- return "SGT";
- case IMLCondition::SIGNED_LT:
- return "SLT";
- default:
- cemu_assert_unimplemented();
- }
- return "ukn";
-}
-
-void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut)
-{
- const sint32 lineOffsetParameters = 10;//18;
-
- StringBuf strOutput(1024);
- strOutput.reset();
- if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R)
- {
- if (inst.type == PPCREC_IML_TYPE_R_NAME)
- strOutput.add("R_NAME");
- else
- strOutput.add("NAME_R");
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
-
- if(inst.type == PPCREC_IML_TYPE_R_NAME)
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR);
-
- strOutput.add("name_");
- if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999))
- {
- strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
- }
- if (inst.op_r_name.name >= PPCREC_NAME_FPR_HALF && inst.op_r_name.name < (PPCREC_NAME_FPR_HALF + 32*2))
- {
- strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR_HALF - inst.op_r_name.name)/2));
- if ((inst.op_r_name.name-PPCREC_NAME_FPR_HALF)&1)
- strOutput.add(".ps1");
- else
- strOutput.add(".ps0");
- }
- else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999))
- {
- strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0);
- }
- else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST)
- strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR);
- else if (inst.op_r_name.name == PPCREC_NAME_XER_CA)
- strOutput.add("xer.ca");
- else if (inst.op_r_name.name == PPCREC_NAME_XER_SO)
- strOutput.add("xer.so");
- else if (inst.op_r_name.name == PPCREC_NAME_XER_OV)
- strOutput.add("xer.ov");
- else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA)
- strOutput.add("cpuReservation.ea");
- else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL)
- strOutput.add("cpuReservation.value");
- else
- {
- strOutput.addFmt("name_ukn{}", inst.op_r_name.name);
- }
- if (inst.type != PPCREC_IML_TYPE_R_NAME)
- {
- strOutput.add(", ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true);
- }
-
- }
- else if (inst.type == PPCREC_IML_TYPE_R_R)
- {
- strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_R_R_R)
- {
- strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY)
- {
- strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_COMPARE)
- {
- strOutput.add("CMP ");
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB);
- strOutput.addFmt("{}", IMLDebug_GetConditionName(inst.op_compare.cond));
- strOutput.add(" -> ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32)
- {
- strOutput.add("CMP ");
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA);
- strOutput.addFmt("{}", inst.op_compare_s32.immS32);
- strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond));
- strOutput.add(" -> ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
- {
- strOutput.add("CJUMP ");
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
- IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true);
- if (!inst.op_conditional_jump.mustBeTrue)
- strOutput.add("(inverted)");
- }
- else if (inst.type == PPCREC_IML_TYPE_JUMP)
- {
- strOutput.add("JUMP");
- }
- else if (inst.type == PPCREC_IML_TYPE_R_R_S32)
- {
- strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
-
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA);
- IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY)
- {
- strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
-
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA);
- IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_R_S32)
- {
- strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
-
- IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR);
- IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE ||
- inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
- {
- if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED)
- strOutput.add("LD_");
- else
- strOutput.add("ST_");
-
- if (inst.op_storeLoad.flags2.signExtend)
- strOutput.add("S");
- else
- strOutput.add("U");
- strOutput.addFmt("{}", inst.op_storeLoad.copyWidth);
-
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
-
- IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData);
-
- if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
- strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2));
- else
- strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32);
- }
- else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
- {
- strOutput.add("ATOMIC_ST_U32");
-
- while ((sint32)strOutput.getLen() < lineOffsetParameters)
- strOutput.add(" ");
-
- IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue);
- IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true);
- }
- else if (inst.type == PPCREC_IML_TYPE_NO_OP)
- {
- strOutput.add("NOP");
- }
- else if (inst.type == PPCREC_IML_TYPE_MACRO)
- {
- if (inst.operation == PPCREC_IML_MACRO_B_TO_REG)
- {
- strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg));
- }
- else if (inst.operation == PPCREC_IML_MACRO_BL)
- {
- strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16);
- }
- else if (inst.operation == PPCREC_IML_MACRO_B_FAR)
- {
- strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16);
- }
- else if (inst.operation == PPCREC_IML_MACRO_LEAVE)
- {
- strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param);
- }
- else if (inst.operation == PPCREC_IML_MACRO_HLE)
- {
- strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2);
- }
- else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES)
- {
- strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param);
- }
- else
- {
- strOutput.addFmt("MACRO ukn operation {}", inst.operation);
- }
- }
- else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD)
- {
- strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData));
- if (inst.op_storeLoad.flags2.signExtend)
- strOutput.add("S");
- else
- strOutput.add("U");
- strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode);
- if (inst.op_storeLoad.flags2.notExpanded)
- {
- strOutput.addFmt(" ");
- }
- }
- else if (inst.type == PPCREC_IML_TYPE_FPR_STORE)
- {
- if (inst.op_storeLoad.flags2.signExtend)
- strOutput.add("S");
- else
- strOutput.add("U");
- strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32);
- strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode);
- }
- else if (inst.type == PPCREC_IML_TYPE_FPR_R)
- {
- strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
- strOutput.addFmt("{}", IMLDebug_GetRegName(inst.op_fpr_r.regR));
- }
- else if (inst.type == PPCREC_IML_TYPE_FPR_R_R)
- {
- strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
- strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA));
- }
- else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R)
- {
- strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
- strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC));
- }
- else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R)
- {
- strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
- strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB));
- }
- else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
- {
- strOutput.addFmt("CYCLE_CHECK");
- }
- else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
- {
- strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond));
- }
- else
- {
- strOutput.addFmt("Unknown iml type {}", inst.type);
- }
- disassemblyLineOut.assign(strOutput.c_str());
-}
-
-void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
-{
- StringBuf strOutput(4096);
-
- strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth);
- if (imlSegment->isEnterable)
- {
- strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress);
- }
- if (imlSegment->deadCodeEliminationHintSeg)
- {
- strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg));
- }
- cemuLog_log(LogType::Force, "{}", strOutput.c_str());
-
- if (printLivenessRangeInfo)
- {
- strOutput.reset();
- IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START);
- cemuLog_log(LogType::Force, "{}", strOutput.c_str());
- }
- //debug_printf("\n");
- strOutput.reset();
-
- std::string disassemblyLine;
- for (sint32 i = 0; i < imlSegment->imlList.size(); i++)
- {
- const IMLInstruction& inst = imlSegment->imlList[i];
- // don't log NOP instructions
- if (inst.type == PPCREC_IML_TYPE_NO_OP)
- continue;
- strOutput.reset();
- strOutput.addFmt("{:02x} ", i);
- //cemuLog_log(LogType::Force, "{:02x} ", i);
- disassemblyLine.clear();
- IMLDebug_DisassembleInstruction(inst, disassemblyLine);
- strOutput.add(disassemblyLine);
- if (printLivenessRangeInfo)
- {
- IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i);
- }
- cemuLog_log(LogType::Force, "{}", strOutput.c_str());
- }
- // all ranges
- if (printLivenessRangeInfo)
- {
- strOutput.reset();
- strOutput.add("Ranges-VirtReg ");
- raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- while (subrangeItr)
- {
- strOutput.addFmt("v{:<4}", (uint32)subrangeItr->GetVirtualRegister());
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- }
- cemuLog_log(LogType::Force, "{}", strOutput.c_str());
- strOutput.reset();
- strOutput.add("Ranges-PhysReg ");
- subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- while (subrangeItr)
- {
- strOutput.addFmt("p{:<4}", subrangeItr->GetPhysicalRegister());
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- }
- cemuLog_log(LogType::Force, "{}", strOutput.c_str());
- }
- // branch info
- strOutput.reset();
- strOutput.add("Links from: ");
- for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++)
- {
- if (i)
- strOutput.add(", ");
- strOutput.addFmt("{}", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str());
- }
- cemuLog_log(LogType::Force, "{}", strOutput.c_str());
- if (imlSegment->nextSegmentBranchNotTaken)
- cemuLog_log(LogType::Force, "BranchNotTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str());
- if (imlSegment->nextSegmentBranchTaken)
- cemuLog_log(LogType::Force, "BranchTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str());
- if (imlSegment->nextSegmentIsUncertain)
- cemuLog_log(LogType::Force, "Dynamic target");
-}
-
-void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo)
-{
- for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++)
- {
- IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo);
- cemuLog_log(LogType::Force, "");
- }
-}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp
deleted file mode 100644
index 997de4e9..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-#include "IMLInstruction.h"
-#include "IML.h"
-
-#include "../PPCRecompiler.h"
-#include "../PPCRecompilerIml.h"
-
-// return true if an instruction has side effects on top of just reading and writing registers
-bool IMLInstruction::HasSideEffects() const
-{
- bool hasSideEffects = true;
- if(type == PPCREC_IML_TYPE_R_R || type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32)
- hasSideEffects = false;
- // todo - add more cases
- return hasSideEffects;
-}
-
-void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
-{
- registersUsed->readGPR1 = IMLREG_INVALID;
- registersUsed->readGPR2 = IMLREG_INVALID;
- registersUsed->readGPR3 = IMLREG_INVALID;
- registersUsed->readGPR4 = IMLREG_INVALID;
- registersUsed->writtenGPR1 = IMLREG_INVALID;
- registersUsed->writtenGPR2 = IMLREG_INVALID;
- if (type == PPCREC_IML_TYPE_R_NAME)
- {
- registersUsed->writtenGPR1 = op_r_name.regR;
- }
- else if (type == PPCREC_IML_TYPE_NAME_R)
- {
- registersUsed->readGPR1 = op_r_name.regR;
- }
- else if (type == PPCREC_IML_TYPE_R_R)
- {
- if (operation == PPCREC_IML_OP_X86_CMP)
- {
- // both operands are read only
- registersUsed->readGPR1 = op_r_r.regR;
- registersUsed->readGPR2 = op_r_r.regA;
- }
- else if (
- operation == PPCREC_IML_OP_ASSIGN ||
- operation == PPCREC_IML_OP_ENDIAN_SWAP ||
- operation == PPCREC_IML_OP_CNTLZW ||
- operation == PPCREC_IML_OP_NOT ||
- operation == PPCREC_IML_OP_NEG ||
- operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 ||
- operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32)
- {
- // result is written, operand is read
- registersUsed->writtenGPR1 = op_r_r.regR;
- registersUsed->readGPR1 = op_r_r.regA;
- }
- else
- cemu_assert_unimplemented();
- }
- else if (type == PPCREC_IML_TYPE_R_S32)
- {
- cemu_assert_debug(operation != PPCREC_IML_OP_ADD &&
- operation != PPCREC_IML_OP_SUB &&
- operation != PPCREC_IML_OP_AND &&
- operation != PPCREC_IML_OP_OR &&
- operation != PPCREC_IML_OP_XOR); // deprecated, use r_r_s32 for these
-
- if (operation == PPCREC_IML_OP_LEFT_ROTATE)
- {
- // register operand is read and write
- registersUsed->readGPR1 = op_r_immS32.regR;
- registersUsed->writtenGPR1 = op_r_immS32.regR;
- }
- else if (operation == PPCREC_IML_OP_X86_CMP)
- {
- // register operand is read only
- registersUsed->readGPR1 = op_r_immS32.regR;
- }
- else
- {
- // register operand is write only
- // todo - use explicit lists, avoid default cases
- registersUsed->writtenGPR1 = op_r_immS32.regR;
- }
- }
- else if (type == PPCREC_IML_TYPE_R_R_S32)
- {
- registersUsed->writtenGPR1 = op_r_r_s32.regR;
- registersUsed->readGPR1 = op_r_r_s32.regA;
- }
- else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
- {
- registersUsed->writtenGPR1 = op_r_r_s32_carry.regR;
- registersUsed->readGPR1 = op_r_r_s32_carry.regA;
- // some operations read carry
- switch (operation)
- {
- case PPCREC_IML_OP_ADD_WITH_CARRY:
- registersUsed->readGPR2 = op_r_r_s32_carry.regCarry;
- break;
- case PPCREC_IML_OP_ADD:
- break;
- default:
- cemu_assert_unimplemented();
- }
- // carry is always written
- registersUsed->writtenGPR2 = op_r_r_s32_carry.regCarry;
- }
- else if (type == PPCREC_IML_TYPE_R_R_R)
- {
- // in all cases result is written and other operands are read only
- // with the exception of XOR, where if regA == regB then all bits are zeroed out. So we don't consider it a read
- registersUsed->writtenGPR1 = op_r_r_r.regR;
- if(!(operation == PPCREC_IML_OP_XOR && op_r_r_r.regA == op_r_r_r.regB))
- {
- registersUsed->readGPR1 = op_r_r_r.regA;
- registersUsed->readGPR2 = op_r_r_r.regB;
- }
- }
- else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
- {
- registersUsed->writtenGPR1 = op_r_r_r_carry.regR;
- registersUsed->readGPR1 = op_r_r_r_carry.regA;
- registersUsed->readGPR2 = op_r_r_r_carry.regB;
- // some operations read carry
- switch (operation)
- {
- case PPCREC_IML_OP_ADD_WITH_CARRY:
- registersUsed->readGPR3 = op_r_r_r_carry.regCarry;
- break;
- case PPCREC_IML_OP_ADD:
- break;
- default:
- cemu_assert_unimplemented();
- }
- // carry is always written
- registersUsed->writtenGPR2 = op_r_r_r_carry.regCarry;
- }
- else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
- {
- // no effect on registers
- }
- else if (type == PPCREC_IML_TYPE_NO_OP)
- {
- // no effect on registers
- }
- else if (type == PPCREC_IML_TYPE_MACRO)
- {
- if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE)
- {
- // no effect on registers
- }
- else if (operation == PPCREC_IML_MACRO_B_TO_REG)
- {
- cemu_assert_debug(op_macro.paramReg.IsValid());
- registersUsed->readGPR1 = op_macro.paramReg;
- }
- else
- cemu_assert_unimplemented();
- }
- else if (type == PPCREC_IML_TYPE_COMPARE)
- {
- registersUsed->readGPR1 = op_compare.regA;
- registersUsed->readGPR2 = op_compare.regB;
- registersUsed->writtenGPR1 = op_compare.regR;
- }
- else if (type == PPCREC_IML_TYPE_COMPARE_S32)
- {
- registersUsed->readGPR1 = op_compare_s32.regA;
- registersUsed->writtenGPR1 = op_compare_s32.regR;
- }
- else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
- {
- registersUsed->readGPR1 = op_conditional_jump.registerBool;
- }
- else if (type == PPCREC_IML_TYPE_JUMP)
- {
- // no registers affected
- }
- else if (type == PPCREC_IML_TYPE_LOAD)
- {
- registersUsed->writtenGPR1 = op_storeLoad.registerData;
- if (op_storeLoad.registerMem.IsValid())
- registersUsed->readGPR1 = op_storeLoad.registerMem;
- }
- else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
- {
- registersUsed->writtenGPR1 = op_storeLoad.registerData;
- if (op_storeLoad.registerMem.IsValid())
- registersUsed->readGPR1 = op_storeLoad.registerMem;
- if (op_storeLoad.registerMem2.IsValid())
- registersUsed->readGPR2 = op_storeLoad.registerMem2;
- }
- else if (type == PPCREC_IML_TYPE_STORE)
- {
- registersUsed->readGPR1 = op_storeLoad.registerData;
- if (op_storeLoad.registerMem.IsValid())
- registersUsed->readGPR2 = op_storeLoad.registerMem;
- }
- else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
- {
- registersUsed->readGPR1 = op_storeLoad.registerData;
- if (op_storeLoad.registerMem.IsValid())
- registersUsed->readGPR2 = op_storeLoad.registerMem;
- if (op_storeLoad.registerMem2.IsValid())
- registersUsed->readGPR3 = op_storeLoad.registerMem2;
- }
- else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
- {
- registersUsed->readGPR1 = op_atomic_compare_store.regEA;
- registersUsed->readGPR2 = op_atomic_compare_store.regCompareValue;
- registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue;
- registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut;
- }
- else if (type == PPCREC_IML_TYPE_CALL_IMM)
- {
- if (op_call_imm.regParam0.IsValid())
- registersUsed->readGPR1 = op_call_imm.regParam0;
- if (op_call_imm.regParam1.IsValid())
- registersUsed->readGPR2 = op_call_imm.regParam1;
- if (op_call_imm.regParam2.IsValid())
- registersUsed->readGPR3 = op_call_imm.regParam2;
- registersUsed->writtenGPR1 = op_call_imm.regReturn;
- }
- else if (type == PPCREC_IML_TYPE_FPR_LOAD)
- {
- // fpr load operation
- registersUsed->writtenGPR1 = op_storeLoad.registerData;
- // address is in gpr register
- if (op_storeLoad.registerMem.IsValid())
- registersUsed->readGPR1 = op_storeLoad.registerMem;
- }
- else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
- {
- // fpr load operation
- registersUsed->writtenGPR1 = op_storeLoad.registerData;
- // address is in gpr registers
- if (op_storeLoad.registerMem.IsValid())
- registersUsed->readGPR1 = op_storeLoad.registerMem;
- if (op_storeLoad.registerMem2.IsValid())
- registersUsed->readGPR2 = op_storeLoad.registerMem2;
- }
- else if (type == PPCREC_IML_TYPE_FPR_STORE)
- {
- // fpr store operation
- registersUsed->readGPR1 = op_storeLoad.registerData;
- if (op_storeLoad.registerMem.IsValid())
- registersUsed->readGPR2 = op_storeLoad.registerMem;
- }
- else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
- {
- // fpr store operation
- registersUsed->readGPR1 = op_storeLoad.registerData;
- // address is in gpr registers
- if (op_storeLoad.registerMem.IsValid())
- registersUsed->readGPR2 = op_storeLoad.registerMem;
- if (op_storeLoad.registerMem2.IsValid())
- registersUsed->readGPR3 = op_storeLoad.registerMem2;
- }
- else if (type == PPCREC_IML_TYPE_FPR_R_R)
- {
- // fpr operation
- if (
- operation == PPCREC_IML_OP_FPR_ASSIGN ||
- operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
- operation == PPCREC_IML_OP_FPR_FCTIWZ
- )
- {
- registersUsed->readGPR1 = op_fpr_r_r.regA;
- registersUsed->writtenGPR1 = op_fpr_r_r.regR;
- }
- else if (operation == PPCREC_IML_OP_FPR_MULTIPLY ||
- operation == PPCREC_IML_OP_FPR_DIVIDE ||
- operation == PPCREC_IML_OP_FPR_ADD ||
- operation == PPCREC_IML_OP_FPR_SUB)
- {
- registersUsed->readGPR1 = op_fpr_r_r.regA;
- registersUsed->readGPR2 = op_fpr_r_r.regR;
- registersUsed->writtenGPR1 = op_fpr_r_r.regR;
-
- }
- else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
- operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ||
- operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
- {
- registersUsed->writtenGPR1 = op_fpr_r_r.regR;
- registersUsed->readGPR1 = op_fpr_r_r.regA;
- }
- else
- cemu_assert_unimplemented();
- }
- else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
- {
- // fpr operation
- registersUsed->readGPR1 = op_fpr_r_r_r.regA;
- registersUsed->readGPR2 = op_fpr_r_r_r.regB;
- registersUsed->writtenGPR1 = op_fpr_r_r_r.regR;
- }
- else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
- {
- // fpr operation
- registersUsed->readGPR1 = op_fpr_r_r_r_r.regA;
- registersUsed->readGPR2 = op_fpr_r_r_r_r.regB;
- registersUsed->readGPR3 = op_fpr_r_r_r_r.regC;
- registersUsed->writtenGPR1 = op_fpr_r_r_r_r.regR;
- }
- else if (type == PPCREC_IML_TYPE_FPR_R)
- {
- // fpr operation
- if (operation == PPCREC_IML_OP_FPR_NEGATE ||
- operation == PPCREC_IML_OP_FPR_ABS ||
- operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS ||
- operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
- operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
- {
- registersUsed->readGPR1 = op_fpr_r.regR;
- registersUsed->writtenGPR1 = op_fpr_r.regR;
- }
- else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE)
- {
- registersUsed->writtenGPR1 = op_fpr_r.regR;
- }
- else
- cemu_assert_unimplemented();
- }
- else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
- {
- registersUsed->writtenGPR1 = op_fpr_compare.regR;
- registersUsed->readGPR1 = op_fpr_compare.regA;
- registersUsed->readGPR2 = op_fpr_compare.regB;
- }
- else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
- {
- // no registers read or written (except for the implicit eflags)
- }
- else
- {
- cemu_assert_unimplemented();
- }
-}
-
-IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map& translationTable)
-{
- if (reg.IsInvalid())
- return reg;
- const auto& it = translationTable.find(reg.GetRegID());
- cemu_assert_debug(it != translationTable.cend());
- IMLReg alteredReg = reg;
- alteredReg.SetRegID(it->second);
- return alteredReg;
-}
-
-void IMLInstruction::RewriteGPR(const std::unordered_map& translationTable)
-{
- if (type == PPCREC_IML_TYPE_R_NAME)
- {
- op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_NAME_R)
- {
- op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_R_R)
- {
- op_r_r.regR = replaceRegisterIdMultiple(op_r_r.regR, translationTable);
- op_r_r.regA = replaceRegisterIdMultiple(op_r_r.regA, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_R_S32)
- {
- op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_R_R_S32)
- {
- op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable);
- op_r_r_s32.regA = replaceRegisterIdMultiple(op_r_r_s32.regA, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
- {
- op_r_r_s32_carry.regR = replaceRegisterIdMultiple(op_r_r_s32_carry.regR, translationTable);
- op_r_r_s32_carry.regA = replaceRegisterIdMultiple(op_r_r_s32_carry.regA, translationTable);
- op_r_r_s32_carry.regCarry = replaceRegisterIdMultiple(op_r_r_s32_carry.regCarry, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_R_R_R)
- {
- op_r_r_r.regR = replaceRegisterIdMultiple(op_r_r_r.regR, translationTable);
- op_r_r_r.regA = replaceRegisterIdMultiple(op_r_r_r.regA, translationTable);
- op_r_r_r.regB = replaceRegisterIdMultiple(op_r_r_r.regB, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
- {
- op_r_r_r_carry.regR = replaceRegisterIdMultiple(op_r_r_r_carry.regR, translationTable);
- op_r_r_r_carry.regA = replaceRegisterIdMultiple(op_r_r_r_carry.regA, translationTable);
- op_r_r_r_carry.regB = replaceRegisterIdMultiple(op_r_r_r_carry.regB, translationTable);
- op_r_r_r_carry.regCarry = replaceRegisterIdMultiple(op_r_r_r_carry.regCarry, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_COMPARE)
- {
- op_compare.regR = replaceRegisterIdMultiple(op_compare.regR, translationTable);
- op_compare.regA = replaceRegisterIdMultiple(op_compare.regA, translationTable);
- op_compare.regB = replaceRegisterIdMultiple(op_compare.regB, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_COMPARE_S32)
- {
- op_compare_s32.regR = replaceRegisterIdMultiple(op_compare_s32.regR, translationTable);
- op_compare_s32.regA = replaceRegisterIdMultiple(op_compare_s32.regA, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
- {
- op_conditional_jump.registerBool = replaceRegisterIdMultiple(op_conditional_jump.registerBool, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP)
- {
- // no effect on registers
- }
- else if (type == PPCREC_IML_TYPE_NO_OP)
- {
- // no effect on registers
- }
- else if (type == PPCREC_IML_TYPE_MACRO)
- {
- if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_COUNT_CYCLES)
- {
- // no effect on registers
- }
- else if (operation == PPCREC_IML_MACRO_B_TO_REG)
- {
- op_macro.paramReg = replaceRegisterIdMultiple(op_macro.paramReg, translationTable);
- }
- else
- {
- cemu_assert_unimplemented();
- }
- }
- else if (type == PPCREC_IML_TYPE_LOAD)
- {
- op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
- if (op_storeLoad.registerMem.IsValid())
- {
- op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
- }
- }
- else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
- {
- op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
- if (op_storeLoad.registerMem.IsValid())
- op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
- if (op_storeLoad.registerMem2.IsValid())
- op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_STORE)
- {
- op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
- if (op_storeLoad.registerMem.IsValid())
- op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
- {
- op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
- if (op_storeLoad.registerMem.IsValid())
- op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
- if (op_storeLoad.registerMem2.IsValid())
- op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
- {
- op_atomic_compare_store.regEA = replaceRegisterIdMultiple(op_atomic_compare_store.regEA, translationTable);
- op_atomic_compare_store.regCompareValue = replaceRegisterIdMultiple(op_atomic_compare_store.regCompareValue, translationTable);
- op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable);
- op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_CALL_IMM)
- {
- op_call_imm.regReturn = replaceRegisterIdMultiple(op_call_imm.regReturn, translationTable);
- if (op_call_imm.regParam0.IsValid())
- op_call_imm.regParam0 = replaceRegisterIdMultiple(op_call_imm.regParam0, translationTable);
- if (op_call_imm.regParam1.IsValid())
- op_call_imm.regParam1 = replaceRegisterIdMultiple(op_call_imm.regParam1, translationTable);
- if (op_call_imm.regParam2.IsValid())
- op_call_imm.regParam2 = replaceRegisterIdMultiple(op_call_imm.regParam2, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_LOAD)
- {
- op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
- op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
- {
- op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
- op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
- op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_STORE)
- {
- op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
- op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
- {
- op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
- op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
- op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_R)
- {
- op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_R_R)
- {
- op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, translationTable);
- op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
- {
- op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, translationTable);
- op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, translationTable);
- op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
- {
- op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, translationTable);
- op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, translationTable);
- op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, translationTable);
- op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
- {
- op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, translationTable);
- op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable);
- op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable);
- }
- else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
- {
- // no registers read or written (except for the implicit eflags)
- }
- else
- {
- cemu_assert_unimplemented();
- }
-}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h
deleted file mode 100644
index 4df2a666..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h
+++ /dev/null
@@ -1,826 +0,0 @@
-#pragma once
-
-using IMLRegID = uint16; // 16 bit ID
-using IMLPhysReg = sint32; // arbitrary value that is up to the architecture backend, usually this will be the register index. A value of -1 is reserved and means not assigned
-
-// format of IMLReg:
-// 0-15 (16 bit) IMLRegID
-// 19-23 (5 bit) Offset In elements, for SIMD registers
-// 24-27 (4 bit) IMLRegFormat RegFormat
-// 28-31 (4 bit) IMLRegFormat BaseFormat
-
-enum class IMLRegFormat : uint8
-{
- INVALID_FORMAT,
- I64,
- I32,
- I16,
- I8,
- // I1 ?
- F64,
- F32,
- TYPE_COUNT,
-};
-
-class IMLReg
-{
-public:
- IMLReg()
- {
- m_raw = 0; // 0 is invalid
- }
-
- IMLReg(IMLRegFormat baseRegFormat, IMLRegFormat regFormat, uint8 viewOffset, IMLRegID regId)
- {
- m_raw = 0;
- m_raw |= ((uint8)baseRegFormat << 28);
- m_raw |= ((uint8)regFormat << 24);
- m_raw |= (uint32)regId;
- }
-
- IMLReg(IMLReg&& baseReg, IMLRegFormat viewFormat, uint8 viewOffset, IMLRegID regId)
- {
- DEBUG_BREAK;
- //m_raw = 0;
- //m_raw |= ((uint8)baseRegFormat << 28);
- //m_raw |= ((uint8)viewFormat << 24);
- //m_raw |= (uint32)regId;
- }
-
- IMLReg(const IMLReg& other) : m_raw(other.m_raw) {}
-
- IMLRegFormat GetBaseFormat() const
- {
- return (IMLRegFormat)((m_raw >> 28) & 0xF);
- }
-
- IMLRegFormat GetRegFormat() const
- {
- return (IMLRegFormat)((m_raw >> 24) & 0xF);
- }
-
- IMLRegID GetRegID() const
- {
- cemu_assert_debug(GetBaseFormat() != IMLRegFormat::INVALID_FORMAT);
- cemu_assert_debug(GetRegFormat() != IMLRegFormat::INVALID_FORMAT);
- return (IMLRegID)(m_raw & 0xFFFF);
- }
-
- void SetRegID(IMLRegID regId)
- {
- cemu_assert_debug(regId <= 0xFFFF);
- m_raw &= ~0xFFFF;
- m_raw |= (uint32)regId;
- }
-
- bool IsInvalid() const
- {
- return GetBaseFormat() == IMLRegFormat::INVALID_FORMAT;
- }
-
- bool IsValid() const
- {
- return GetBaseFormat() != IMLRegFormat::INVALID_FORMAT;
- }
-
- bool IsValidAndSameRegID(IMLRegID regId) const
- {
- return IsValid() && GetRegID() == regId;
- }
-
- // compare all fields
- bool operator==(const IMLReg& other) const
- {
- return m_raw == other.m_raw;
- }
-
-private:
- uint32 m_raw;
-};
-
-static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0);
-static const IMLRegID IMLRegID_INVALID(0xFFFF);
-
-using IMLName = uint32;
-
-enum
-{
- PPCREC_IML_OP_ASSIGN, // '=' operator
- PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
- PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
- PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
- PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
- PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
- PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
-
- // binary operation
- PPCREC_IML_OP_OR, // '|' operator
- PPCREC_IML_OP_AND, // '&' operator
- PPCREC_IML_OP_XOR, // '^' operator
- PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
- PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
- PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned)
- PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed)
- // ppc
- PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
- PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
- PPCREC_IML_OP_CNTLZW,
- // FPU
- PPCREC_IML_OP_FPR_ASSIGN,
- PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
- PPCREC_IML_OP_FPR_ADD,
- PPCREC_IML_OP_FPR_SUB,
- PPCREC_IML_OP_FPR_MULTIPLY,
- PPCREC_IML_OP_FPR_DIVIDE,
- PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, // expand f32 to f64 in-place
- PPCREC_IML_OP_FPR_NEGATE,
- PPCREC_IML_OP_FPR_ABS, // abs(fpr)
- PPCREC_IML_OP_FPR_NEGATIVE_ABS, // -abs(fpr)
- PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
- PPCREC_IML_OP_FPR_FCTIWZ,
- PPCREC_IML_OP_FPR_SELECT, // selectively copy bottom value from operand B or C based on value in operand A
- // Conversion (FPR_R_R)
- PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
- PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
-
- // Bitcast (FPR_R_R)
- PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT,
-
- // R_R_R + R_R_S32
- PPCREC_IML_OP_ADD, // also R_R_R_CARRY
- PPCREC_IML_OP_SUB,
-
- // R_R only
- PPCREC_IML_OP_NOT,
- PPCREC_IML_OP_NEG,
- PPCREC_IML_OP_ASSIGN_S16_TO_S32,
- PPCREC_IML_OP_ASSIGN_S8_TO_S32,
-
- // R_R_R_carry
- PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1)
-
- // X86 extension
- PPCREC_IML_OP_X86_CMP, // R_R and R_S32
-
- PPCREC_IML_OP_INVALID
-};
-
-#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
-
-enum
-{
- PPCREC_IML_MACRO_B_TO_REG, // branch to PPC address in register (used for BCCTR, BCLR)
-
- PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
- PPCREC_IML_MACRO_B_FAR, // branch to different function
- PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
- PPCREC_IML_MACRO_HLE, // HLE function call
- PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
- // debugging
- PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
-};
-
-enum class IMLCondition : uint8
-{
- EQ,
- NEQ,
- SIGNED_GT,
- SIGNED_LT,
- UNSIGNED_GT,
- UNSIGNED_LT,
-
- // floating point conditions
- UNORDERED_GT, // a > b, false if either is NaN
- UNORDERED_LT, // a < b, false if either is NaN
- UNORDERED_EQ, // a == b, false if either is NaN
- UNORDERED_U, // unordered (true if either operand is NaN)
-
- ORDERED_GT,
- ORDERED_LT,
- ORDERED_EQ,
- ORDERED_U
-};
-
-enum
-{
- PPCREC_IML_TYPE_NONE,
- PPCREC_IML_TYPE_NO_OP, // no-op instruction
- PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r)
- PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
- PPCREC_IML_TYPE_R_R_R_CARRY, // r* = r* (op) r* (reads and/or updates carry)
- PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
- PPCREC_IML_TYPE_R_R_S32_CARRY, // r* = r* (op) s32* (reads and/or updates carry)
- PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
- PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
- PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
- PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
- PPCREC_IML_TYPE_R_NAME, // r* = name
- PPCREC_IML_TYPE_NAME_R, // name* = r*
- PPCREC_IML_TYPE_R_S32, // r* (op) imm
- PPCREC_IML_TYPE_MACRO,
- PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0
-
- // conditions and branches
- PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r*
- PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm
- PPCREC_IML_TYPE_JUMP, // jump always
- PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register
-
- // atomic
- PPCREC_IML_TYPE_ATOMIC_CMP_STORE,
-
- // function call
- PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address
-
- // FPR
- PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
- PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
- PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
- PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
- PPCREC_IML_TYPE_FPR_R_R,
- PPCREC_IML_TYPE_FPR_R_R_R,
- PPCREC_IML_TYPE_FPR_R_R_R_R,
- PPCREC_IML_TYPE_FPR_R,
-
- PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r*
-
- // X86 specific
- PPCREC_IML_TYPE_X86_EFLAGS_JCC,
-};
-
-enum // IMLName
-{
- PPCREC_NAME_NONE,
- PPCREC_NAME_TEMPORARY = 1000,
- PPCREC_NAME_R0 = 2000,
- PPCREC_NAME_SPR0 = 3000,
- PPCREC_NAME_FPR_HALF = 4800, // Counts PS0 and PS1 separately. E.g. fp3.ps1 is at offset 3 * 2 + 1
- PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
- PPCREC_NAME_XER_CA = 6000, // carry bit from XER
- PPCREC_NAME_XER_OV = 6001, // overflow bit from XER
- PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER
- PPCREC_NAME_CR = 7000, // CR register bits (31 to 0)
- PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31,
- PPCREC_NAME_CPU_MEMRES_EA = 8000,
- PPCREC_NAME_CPU_MEMRES_VAL = 8001
-};
-
-#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead
-
-enum
-{
- // fpr load
- PPCREC_FPR_LD_MODE_SINGLE,
- PPCREC_FPR_LD_MODE_DOUBLE,
-
- // fpr store
- PPCREC_FPR_ST_MODE_SINGLE,
- PPCREC_FPR_ST_MODE_DOUBLE,
-
- PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
-};
-
-struct IMLUsedRegisters
-{
- IMLUsedRegisters() {};
-
- bool IsWrittenByRegId(IMLRegID regId) const
- {
- if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId)
- return true;
- if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId)
- return true;
- return false;
- }
-
- bool IsBaseGPRWritten(IMLReg imlReg) const
- {
- cemu_assert_debug(imlReg.IsValid());
- auto regId = imlReg.GetRegID();
- return IsWrittenByRegId(regId);
- }
-
- template
- void ForEachWrittenGPR(Fn F) const
- {
- if (writtenGPR1.IsValid())
- F(writtenGPR1);
- if (writtenGPR2.IsValid())
- F(writtenGPR2);
- }
-
- template
- void ForEachReadGPR(Fn F) const
- {
- if (readGPR1.IsValid())
- F(readGPR1);
- if (readGPR2.IsValid())
- F(readGPR2);
- if (readGPR3.IsValid())
- F(readGPR3);
- if (readGPR4.IsValid())
- F(readGPR4);
- }
-
- template
- void ForEachAccessedGPR(Fn F) const
- {
- // GPRs
- if (readGPR1.IsValid())
- F(readGPR1, false);
- if (readGPR2.IsValid())
- F(readGPR2, false);
- if (readGPR3.IsValid())
- F(readGPR3, false);
- if (readGPR4.IsValid())
- F(readGPR4, false);
- if (writtenGPR1.IsValid())
- F(writtenGPR1, true);
- if (writtenGPR2.IsValid())
- F(writtenGPR2, true);
- }
-
- IMLReg readGPR1;
- IMLReg readGPR2;
- IMLReg readGPR3;
- IMLReg readGPR4;
- IMLReg writtenGPR1;
- IMLReg writtenGPR2;
-};
-
-struct IMLInstruction
-{
- IMLInstruction() {}
- IMLInstruction(const IMLInstruction& other)
- {
- memcpy(this, &other, sizeof(IMLInstruction));
- }
-
- uint8 type;
- uint8 operation;
- union
- {
- struct
- {
- uint8 _padding[7];
- }padding;
- struct
- {
- IMLReg regR;
- IMLReg regA;
- }op_r_r;
- struct
- {
- IMLReg regR;
- IMLReg regA;
- IMLReg regB;
- }op_r_r_r;
- struct
- {
- IMLReg regR;
- IMLReg regA;
- IMLReg regB;
- IMLReg regCarry;
- }op_r_r_r_carry;
- struct
- {
- IMLReg regR;
- IMLReg regA;
- sint32 immS32;
- }op_r_r_s32;
- struct
- {
- IMLReg regR;
- IMLReg regA;
- IMLReg regCarry;
- sint32 immS32;
- }op_r_r_s32_carry;
- struct
- {
- IMLReg regR;
- IMLName name;
- }op_r_name; // alias op_name_r
- struct
- {
- IMLReg regR;
- sint32 immS32;
- }op_r_immS32;
- struct
- {
- uint32 param;
- uint32 param2;
- uint16 paramU16;
- IMLReg paramReg;
- }op_macro;
- struct
- {
- IMLReg registerData;
- IMLReg registerMem;
- IMLReg registerMem2;
- uint8 copyWidth;
- struct
- {
- bool swapEndian : 1;
- bool signExtend : 1;
- bool notExpanded : 1; // for floats
- }flags2;
- uint8 mode; // transfer mode
- sint32 immS32;
- }op_storeLoad;
- struct
- {
- uintptr_t callAddress;
- IMLReg regParam0;
- IMLReg regParam1;
- IMLReg regParam2;
- IMLReg regReturn;
- }op_call_imm;
- struct
- {
- IMLReg regR;
- IMLReg regA;
- }op_fpr_r_r;
- struct
- {
- IMLReg regR;
- IMLReg regA;
- IMLReg regB;
- }op_fpr_r_r_r;
- struct
- {
- IMLReg regR;
- IMLReg regA;
- IMLReg regB;
- IMLReg regC;
- }op_fpr_r_r_r_r;
- struct
- {
- IMLReg regR;
- }op_fpr_r;
- struct
- {
- IMLReg regR; // stores the boolean result of the comparison
- IMLReg regA;
- IMLReg regB;
- IMLCondition cond;
- }op_fpr_compare;
- struct
- {
- IMLReg regR; // stores the boolean result of the comparison
- IMLReg regA;
- IMLReg regB;
- IMLCondition cond;
- }op_compare;
- struct
- {
- IMLReg regR; // stores the boolean result of the comparison
- IMLReg regA;
- sint32 immS32;
- IMLCondition cond;
- }op_compare_s32;
- struct
- {
- IMLReg registerBool;
- bool mustBeTrue;
- }op_conditional_jump;
- struct
- {
- IMLReg regEA;
- IMLReg regCompareValue;
- IMLReg regWriteValue;
- IMLReg regBoolOut;
- }op_atomic_compare_store;
- // conditional operations (emitted if supported by target platform)
- struct
- {
- // r_s32
- IMLReg regR;
- sint32 immS32;
- // condition
- uint8 crRegisterIndex;
- uint8 crBitIndex;
- bool bitMustBeSet;
- }op_conditional_r_s32;
- // X86 specific
- struct
- {
- IMLCondition cond;
- bool invertedCondition;
- }op_x86_eflags_jcc;
- };
-
- bool IsSuffixInstruction() const
- {
- if (type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BL ||
- type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_FAR ||
- type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_TO_REG ||
- type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE ||
- type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE ||
- type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ||
- type == PPCREC_IML_TYPE_JUMP ||
- type == PPCREC_IML_TYPE_CONDITIONAL_JUMP ||
- type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
- return true;
- return false;
- }
-
- // instruction setters
- void make_no_op()
- {
- type = PPCREC_IML_TYPE_NO_OP;
- operation = 0;
- }
-
- void make_r_name(IMLReg regR, IMLName name)
- {
- cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format
- type = PPCREC_IML_TYPE_R_NAME;
- operation = PPCREC_IML_OP_ASSIGN;
- op_r_name.regR = regR;
- op_r_name.name = name;
- }
-
- void make_name_r(IMLName name, IMLReg regR)
- {
- cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format
- type = PPCREC_IML_TYPE_NAME_R;
- operation = PPCREC_IML_OP_ASSIGN;
- op_r_name.regR = regR;
- op_r_name.name = name;
- }
-
- void make_debugbreak(uint32 currentPPCAddress = 0)
- {
- make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0, IMLREG_INVALID);
- }
-
- void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16, IMLReg regParam)
- {
- this->type = PPCREC_IML_TYPE_MACRO;
- this->operation = macroId;
- this->op_macro.param = param;
- this->op_macro.param2 = param2;
- this->op_macro.paramU16 = paramU16;
- this->op_macro.paramReg = regParam;
- }
-
- void make_cjump_cycle_check()
- {
- this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK;
- this->operation = 0;
- }
-
- void make_r_r(uint32 operation, IMLReg regR, IMLReg regA)
- {
- this->type = PPCREC_IML_TYPE_R_R;
- this->operation = operation;
- this->op_r_r.regR = regR;
- this->op_r_r.regA = regA;
- }
-
- void make_r_s32(uint32 operation, IMLReg regR, sint32 immS32)
- {
- this->type = PPCREC_IML_TYPE_R_S32;
- this->operation = operation;
- this->op_r_immS32.regR = regR;
- this->op_r_immS32.immS32 = immS32;
- }
-
- void make_r_r_r(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB)
- {
- this->type = PPCREC_IML_TYPE_R_R_R;
- this->operation = operation;
- this->op_r_r_r.regR = regR;
- this->op_r_r_r.regA = regA;
- this->op_r_r_r.regB = regB;
- }
-
- void make_r_r_r_carry(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB, IMLReg regCarry)
- {
- this->type = PPCREC_IML_TYPE_R_R_R_CARRY;
- this->operation = operation;
- this->op_r_r_r_carry.regR = regR;
- this->op_r_r_r_carry.regA = regA;
- this->op_r_r_r_carry.regB = regB;
- this->op_r_r_r_carry.regCarry = regCarry;
- }
-
- void make_r_r_s32(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32)
- {
- this->type = PPCREC_IML_TYPE_R_R_S32;
- this->operation = operation;
- this->op_r_r_s32.regR = regR;
- this->op_r_r_s32.regA = regA;
- this->op_r_r_s32.immS32 = immS32;
- }
-
- void make_r_r_s32_carry(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32, IMLReg regCarry)
- {
- this->type = PPCREC_IML_TYPE_R_R_S32_CARRY;
- this->operation = operation;
- this->op_r_r_s32_carry.regR = regR;
- this->op_r_r_s32_carry.regA = regA;
- this->op_r_r_s32_carry.immS32 = immS32;
- this->op_r_r_s32_carry.regCarry = regCarry;
- }
-
- void make_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
- {
- this->type = PPCREC_IML_TYPE_COMPARE;
- this->operation = PPCREC_IML_OP_INVALID;
- this->op_compare.regR = regR;
- this->op_compare.regA = regA;
- this->op_compare.regB = regB;
- this->op_compare.cond = cond;
- }
-
- void make_compare_s32(IMLReg regA, sint32 immS32, IMLReg regR, IMLCondition cond)
- {
- this->type = PPCREC_IML_TYPE_COMPARE_S32;
- this->operation = PPCREC_IML_OP_INVALID;
- this->op_compare_s32.regR = regR;
- this->op_compare_s32.regA = regA;
- this->op_compare_s32.immS32 = immS32;
- this->op_compare_s32.cond = cond;
- }
-
- void make_conditional_jump(IMLReg regBool, bool mustBeTrue)
- {
- this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP;
- this->operation = PPCREC_IML_OP_INVALID;
- this->op_conditional_jump.registerBool = regBool;
- this->op_conditional_jump.mustBeTrue = mustBeTrue;
- }
-
- void make_jump()
- {
- this->type = PPCREC_IML_TYPE_JUMP;
- this->operation = PPCREC_IML_OP_INVALID;
- }
-
- // load from memory
- void make_r_memory(IMLReg regD, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
- {
- this->type = PPCREC_IML_TYPE_LOAD;
- this->operation = 0;
- this->op_storeLoad.registerData = regD;
- this->op_storeLoad.registerMem = regMem;
- this->op_storeLoad.immS32 = immS32;
- this->op_storeLoad.copyWidth = copyWidth;
- this->op_storeLoad.flags2.swapEndian = switchEndian;
- this->op_storeLoad.flags2.signExtend = signExtend;
- }
-
- // store to memory
- void make_memory_r(IMLReg regS, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool switchEndian)
- {
- this->type = PPCREC_IML_TYPE_STORE;
- this->operation = 0;
- this->op_storeLoad.registerData = regS;
- this->op_storeLoad.registerMem = regMem;
- this->op_storeLoad.immS32 = immS32;
- this->op_storeLoad.copyWidth = copyWidth;
- this->op_storeLoad.flags2.swapEndian = switchEndian;
- this->op_storeLoad.flags2.signExtend = false;
- }
-
- void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput)
- {
- this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE;
- this->operation = 0;
- this->op_atomic_compare_store.regEA = regEA;
- this->op_atomic_compare_store.regCompareValue = regCompareValue;
- this->op_atomic_compare_store.regWriteValue = regWriteValue;
- this->op_atomic_compare_store.regBoolOut = regSuccessOutput;
- }
-
- void make_call_imm(uintptr_t callAddress, IMLReg param0, IMLReg param1, IMLReg param2, IMLReg regReturn)
- {
- this->type = PPCREC_IML_TYPE_CALL_IMM;
- this->operation = 0;
- this->op_call_imm.callAddress = callAddress;
- this->op_call_imm.regParam0 = param0;
- this->op_call_imm.regParam1 = param1;
- this->op_call_imm.regParam2 = param2;
- this->op_call_imm.regReturn = regReturn;
- }
-
- // FPR
-
- // load from memory
- void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
- {
- this->type = PPCREC_IML_TYPE_FPR_LOAD;
- this->operation = 0;
- this->op_storeLoad.registerData = registerDestination;
- this->op_storeLoad.registerMem = registerMemory;
- this->op_storeLoad.immS32 = immS32;
- this->op_storeLoad.mode = mode;
- this->op_storeLoad.flags2.swapEndian = switchEndian;
- }
-
- void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian)
- {
- this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED;
- this->operation = 0;
- this->op_storeLoad.registerData = registerDestination;
- this->op_storeLoad.registerMem = registerMemory1;
- this->op_storeLoad.registerMem2 = registerMemory2;
- this->op_storeLoad.immS32 = 0;
- this->op_storeLoad.mode = mode;
- this->op_storeLoad.flags2.swapEndian = switchEndian;
- }
-
- // store to memory
- void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
- {
- this->type = PPCREC_IML_TYPE_FPR_STORE;
- this->operation = 0;
- this->op_storeLoad.registerData = registerSource;
- this->op_storeLoad.registerMem = registerMemory;
- this->op_storeLoad.immS32 = immS32;
- this->op_storeLoad.mode = mode;
- this->op_storeLoad.flags2.swapEndian = switchEndian;
- }
-
- void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian)
- {
- this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED;
- this->operation = 0;
- this->op_storeLoad.registerData = registerSource;
- this->op_storeLoad.registerMem = registerMemory1;
- this->op_storeLoad.registerMem2 = registerMemory2;
- this->op_storeLoad.immS32 = immS32;
- this->op_storeLoad.mode = mode;
- this->op_storeLoad.flags2.swapEndian = switchEndian;
- }
-
- void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
- {
- this->type = PPCREC_IML_TYPE_FPR_COMPARE;
- this->operation = -999;
- this->op_fpr_compare.regR = regR;
- this->op_fpr_compare.regA = regA;
- this->op_fpr_compare.regB = regB;
- this->op_fpr_compare.cond = cond;
- }
-
- void make_fpr_r(sint32 operation, IMLReg registerResult)
- {
- // OP (fpr)
- this->type = PPCREC_IML_TYPE_FPR_R;
- this->operation = operation;
- this->op_fpr_r.regR = registerResult;
- }
-
- void make_fpr_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER)
- {
- // fpr OP fpr
- this->type = PPCREC_IML_TYPE_FPR_R_R;
- this->operation = operation;
- this->op_fpr_r_r.regR = registerResult;
- this->op_fpr_r_r.regA = registerOperand;
- }
-
- void make_fpr_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER)
- {
- // fpr = OP (fpr,fpr)
- this->type = PPCREC_IML_TYPE_FPR_R_R_R;
- this->operation = operation;
- this->op_fpr_r_r_r.regR = registerResult;
- this->op_fpr_r_r_r.regA = registerOperand1;
- this->op_fpr_r_r_r.regB = registerOperand2;
- }
-
- void make_fpr_r_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER)
- {
- // fpr = OP (fpr,fpr,fpr)
- this->type = PPCREC_IML_TYPE_FPR_R_R_R_R;
- this->operation = operation;
- this->op_fpr_r_r_r_r.regR = registerResult;
- this->op_fpr_r_r_r_r.regA = registerOperandA;
- this->op_fpr_r_r_r_r.regB = registerOperandB;
- this->op_fpr_r_r_r_r.regC = registerOperandC;
- }
-
- /* X86 specific */
- void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition)
- {
- this->type = PPCREC_IML_TYPE_X86_EFLAGS_JCC;
- this->operation = -999;
- this->op_x86_eflags_jcc.cond = cond;
- this->op_x86_eflags_jcc.invertedCondition = invertedCondition;
- }
-
- void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
- bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used
-
- void RewriteGPR(const std::unordered_map& translationTable);
-};
-
-// architecture specific constants
-namespace IMLArchX86
-{
- static constexpr int PHYSREG_GPR_BASE = 0;
- static constexpr int PHYSREG_FPR_BASE = 16;
-};
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
deleted file mode 100644
index 7671a163..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
+++ /dev/null
@@ -1,719 +0,0 @@
-#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
-#include "Cafe/HW/Espresso/Recompiler/IML/IML.h"
-#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h"
-
-#include "../PPCRecompiler.h"
-#include "../PPCRecompilerIml.h"
-#include "../BackendX64/BackendX64.h"
-
-#include "Common/FileStream.h"
-
-#include
-#include
-
-IMLReg _FPRRegFromID(IMLRegID regId)
-{
- return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId);
-}
-
-void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg)
-{
- IMLRegID fprIndex = fprReg.GetRegID();
-
- IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
- if (imlInstructionLoad->op_storeLoad.flags2.notExpanded)
- return;
- boost::container::static_vector trackedMoves; // only track up to 4 copies
- IMLUsedRegisters registersUsed;
- sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
- bool foundMatch = false;
- sint32 lastStore = -1;
- for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++)
- {
- IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
- if (imlInstruction->IsSuffixInstruction())
- break;
- // check if FPR is stored
- if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE) ||
- (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE))
- {
- if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex)
- {
- if (foundMatch == false)
- {
- // flag the load-single instruction as "don't expand" (leave single value as-is)
- imlInstructionLoad->op_storeLoad.flags2.notExpanded = true;
- }
- // also set the flag for the store instruction
- IMLInstruction* imlInstructionStore = imlInstruction;
- imlInstructionStore->op_storeLoad.flags2.notExpanded = true;
-
- foundMatch = true;
- lastStore = i + 1;
-
- continue;
- }
- }
- // if the FPR is copied then keep track of it. We can expand the copies instead of the original
- if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN && imlInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex)
- {
- if (imlInstruction->op_fpr_r_r.regR.GetRegID() == fprIndex)
- {
- // unexpected no-op
- break;
- }
- if (trackedMoves.size() >= trackedMoves.capacity())
- {
- // we cant track any more moves, expand here
- lastStore = i;
- break;
- }
- trackedMoves.push_back(i);
- continue;
- }
- // check if FPR is overwritten
- imlInstruction->CheckRegisterUsage(®istersUsed);
- if (registersUsed.writtenGPR1.IsValidAndSameRegID(fprIndex) || registersUsed.writtenGPR2.IsValidAndSameRegID(fprIndex))
- break;
- if (registersUsed.readGPR1.IsValidAndSameRegID(fprIndex))
- break;
- if (registersUsed.readGPR2.IsValidAndSameRegID(fprIndex))
- break;
- if (registersUsed.readGPR3.IsValidAndSameRegID(fprIndex))
- break;
- if (registersUsed.readGPR4.IsValidAndSameRegID(fprIndex))
- break;
- }
-
- if (foundMatch)
- {
- // insert expand instructions for each target register of a move
- sint32 positionBias = 0;
- for (auto& trackedMove : trackedMoves)
- {
- sint32 realPosition = trackedMove + positionBias;
- IMLInstruction* imlMoveInstruction = imlSegment->imlList.data() + realPosition;
- if (realPosition >= lastStore)
- break; // expand is inserted before this move
- else
- lastStore++;
-
- cemu_assert_debug(imlMoveInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlMoveInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex);
- cemu_assert_debug(imlMoveInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::F64);
- auto dstReg = imlMoveInstruction->op_fpr_r_r.regR;
- IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, realPosition+1); // one after the move
- newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, dstReg);
- positionBias++;
- }
- // insert expand instruction after store
- IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore);
- newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex));
- }
-}
-
-/*
-* Scans for patterns:
-*
-*
-*
-* For these patterns the store and load is modified to work with un-extended values (float remains as float, no double conversion)
-* The float->double extension is then executed later
-* Advantages:
-* Keeps denormals and other special float values intact
-* Slightly improves performance
-*/
-void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext)
-{
- for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
- {
- for (sint32 i = 0; i < segIt->imlList.size(); i++)
- {
- IMLInstruction* imlInstruction = segIt->imlList.data() + i;
- if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
- {
- PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
- }
- else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
- {
- PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
- }
- }
- }
-}
-
-void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
-{
- cemu_assert_debug(gprReg.GetBaseFormat() == IMLRegFormat::I64); // todo - proper handling required for non-standard sizes
- cemu_assert_debug(gprReg.GetRegFormat() == IMLRegFormat::I32);
-
- IMLRegID gprIndex = gprReg.GetRegID();
- IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
- if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false )
- return;
- bool foundMatch = false;
- IMLUsedRegisters registersUsed;
- sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
- sint32 i = imlIndexLoad + 1;
- for (; i < scanRangeEnd; i++)
- {
- IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
- if (imlInstruction->IsSuffixInstruction())
- break;
- // check if GPR is stored
- if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) )
- {
- if (imlInstruction->op_storeLoad.registerMem.GetRegID() == gprIndex)
- break;
- if (imlInstruction->op_storeLoad.registerData.GetRegID() == gprIndex)
- {
- IMLInstruction* imlInstructionStore = imlInstruction;
- if (foundMatch == false)
- {
- // switch the endian swap flag for the load instruction
- imlInstructionLoad->op_storeLoad.flags2.swapEndian = !imlInstructionLoad->op_storeLoad.flags2.swapEndian;
- foundMatch = true;
- }
- // switch the endian swap flag for the store instruction
- imlInstructionStore->op_storeLoad.flags2.swapEndian = !imlInstructionStore->op_storeLoad.flags2.swapEndian;
- // keep scanning
- continue;
- }
- }
- // check if GPR is accessed
- imlInstruction->CheckRegisterUsage(®istersUsed);
- if (registersUsed.readGPR1.IsValidAndSameRegID(gprIndex) ||
- registersUsed.readGPR2.IsValidAndSameRegID(gprIndex) ||
- registersUsed.readGPR3.IsValidAndSameRegID(gprIndex))
- {
- break;
- }
- if (registersUsed.IsBaseGPRWritten(gprReg))
- return; // GPR overwritten, we don't need to byte swap anymore
- }
- if (foundMatch)
- {
- PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprReg, gprReg);
- }
-}
-
-/*
-* Scans for patterns:
-*
-*
-*
-* For these patterns the store and load is modified to work with non-swapped values
-* The big_endian->little_endian conversion is then executed later
-* Advantages:
-* Slightly improves performance
-*/
-void IMLOptimizer_OptimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext)
-{
- for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
- {
- for (sint32 i = 0; i < segIt->imlList.size(); i++)
- {
- IMLInstruction* imlInstruction = segIt->imlList.data() + i;
- if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian )
- {
- PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
- }
- }
- }
-}
-
-IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg);
-
-sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg)
-{
- if (gqrReg.IsInvalid())
- return -1;
- sint32 namedReg = PPCRecompilerImlGen_GetRegName(ppcImlGenContext, gqrReg);
- if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7))
- {
- return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0);
- }
- else
- {
- cemu_assert_suspicious();
- }
- return -1;
-}
-
-bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue)
-{
- // the default configuration is:
- // UGQR0 = 0x00000000
- // UGQR2 = 0x00040004
- // UGQR3 = 0x00050005
- // UGQR4 = 0x00060006
- // UGQR5 = 0x00070007
- // but games are free to modify UGQR2 to UGQR7 it seems.
- // no game modifies UGQR0 so it's safe enough to optimize for the default value
- // Ideally we would do some kind of runtime tracking and second recompilation to create fast paths for PSQ_L/PSQ_ST but thats todo
- if (gqrIndex == 0)
- gqrValue = 0x00000000;
- else
- return false;
- return true;
-}
-
-// analyses register dependencies across the entire function
-// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten)
-class IMLOptimizerRegIOAnalysis
-{
- public:
- // constructor with segment pointer list as span
- IMLOptimizerRegIOAnalysis(std::span segmentList, uint32 maxRegId) : m_segmentList(segmentList), m_maxRegId(maxRegId)
- {
- m_segRegisterInOutList.resize(segmentList.size());
- }
-
- struct IMLSegmentRegisterInOut
- {
- // todo - since our register ID range is usually pretty small (<64) we could use integer bitmasks to accelerate this? There is a helper class used in RA code already
- std::unordered_set regWritten; // registers which are modified in this segment
- std::unordered_set regImported; // registers which are read in this segment before they are written (importing value from previous segments)
- std::unordered_set regForward; // registers which are not read or written in this segment, but are imported into a later segment (propagated info)
- };
-
- // calculate which registers are imported (read-before-written) and forwarded (read-before-written by a later segment) per segment
- // then in a second step propagate the dependencies across linked segments
- void ComputeDepedencies()
- {
- std::vector& segRegisterInOutList = m_segRegisterInOutList;
- IMLSegmentRegisterInOut* segIO = segRegisterInOutList.data();
- uint32 index = 0;
- for(auto& seg : m_segmentList)
- {
- seg->momentaryIndex = index;
- index++;
- for(auto& instr : seg->imlList)
- {
- IMLUsedRegisters registerUsage;
- instr.CheckRegisterUsage(®isterUsage);
- // registers are considered imported if they are read before being written in this seg
- registerUsage.ForEachReadGPR([&](IMLReg gprReg) {
- IMLRegID gprId = gprReg.GetRegID();
- if (!segIO->regWritten.contains(gprId))
- {
- segIO->regImported.insert(gprId);
- }
- });
- registerUsage.ForEachWrittenGPR([&](IMLReg gprReg) {
- IMLRegID gprId = gprReg.GetRegID();
- segIO->regWritten.insert(gprId);
- });
- }
- segIO++;
- }
- // for every exit segment, import all registers
- for(auto& seg : m_segmentList)
- {
- if (!seg->nextSegmentIsUncertain)
- continue;
- if(seg->deadCodeEliminationHintSeg)
- continue;
- IMLSegmentRegisterInOut& segIO = segRegisterInOutList[seg->momentaryIndex];
- for(uint32 i=0; i<=m_maxRegId; i++)
- {
- segIO.regImported.insert((IMLRegID)i);
- }
- }
- // broadcast dependencies across segment chains
- std::unordered_set segIdsWhichNeedUpdate;
- for (uint32 i = 0; i < m_segmentList.size(); i++)
- {
- segIdsWhichNeedUpdate.insert(i);
- }
- while(!segIdsWhichNeedUpdate.empty())
- {
- auto firstIt = segIdsWhichNeedUpdate.begin();
- uint32 segId = *firstIt;
- segIdsWhichNeedUpdate.erase(firstIt);
- // forward regImported and regForward to earlier segments into their regForward, unless the register is written
- auto& curSeg = m_segmentList[segId];
- IMLSegmentRegisterInOut& curSegIO = segRegisterInOutList[segId];
- for(auto& prevSeg : curSeg->list_prevSegments)
- {
- IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex];
- bool prevSegChanged = false;
- for(auto& regId : curSegIO.regImported)
- {
- if (!prevSegIO.regWritten.contains(regId))
- prevSegChanged |= prevSegIO.regForward.insert(regId).second;
- }
- for(auto& regId : curSegIO.regForward)
- {
- if (!prevSegIO.regWritten.contains(regId))
- prevSegChanged |= prevSegIO.regForward.insert(regId).second;
- }
- if(prevSegChanged)
- segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex);
- }
- // same for hint links
- for(auto& prevSeg : curSeg->list_deadCodeHintBy)
- {
- IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex];
- bool prevSegChanged = false;
- for(auto& regId : curSegIO.regImported)
- {
- if (!prevSegIO.regWritten.contains(regId))
- prevSegChanged |= prevSegIO.regForward.insert(regId).second;
- }
- for(auto& regId : curSegIO.regForward)
- {
- if (!prevSegIO.regWritten.contains(regId))
- prevSegChanged |= prevSegIO.regForward.insert(regId).second;
- }
- if(prevSegChanged)
- segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex);
- }
- }
- }
-
- std::unordered_set GetRegistersNeededAtEndOfSegment(IMLSegment& seg)
- {
- std::unordered_set regsNeeded;
- if(seg.nextSegmentIsUncertain)
- {
- if(seg.deadCodeEliminationHintSeg)
- {
- auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex];
- regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
- regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
- }
- else
- {
- // add all regs
- for(uint32 i = 0; i <= m_maxRegId; i++)
- regsNeeded.insert(i);
- }
- return regsNeeded;
- }
- if(seg.nextSegmentBranchTaken)
- {
- auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex];
- regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
- regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
- }
- if(seg.nextSegmentBranchNotTaken)
- {
- auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex];
- regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
- regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
- }
- return regsNeeded;
- }
-
- bool IsRegisterNeededAtEndOfSegment(IMLSegment& seg, IMLRegID regId)
- {
- if(seg.nextSegmentIsUncertain)
- {
- if(!seg.deadCodeEliminationHintSeg)
- return true;
- auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex];
- if(nextSegIO.regImported.contains(regId))
- return true;
- if(nextSegIO.regForward.contains(regId))
- return true;
- return false;
- }
- if(seg.nextSegmentBranchTaken)
- {
- auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex];
- if(nextSegIO.regImported.contains(regId))
- return true;
- if(nextSegIO.regForward.contains(regId))
- return true;
- }
- if(seg.nextSegmentBranchNotTaken)
- {
- auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex];
- if(nextSegIO.regImported.contains(regId))
- return true;
- if(nextSegIO.regForward.contains(regId))
- return true;
- }
- return false;
- }
-
- private:
- std::span m_segmentList;
- uint32 m_maxRegId;
-
- std::vector m_segRegisterInOutList;
-
-};
-
-// scan backwards starting from index and return the index of the first found instruction which writes to the given register (by id)
-sint32 IMLUtil_FindInstructionWhichWritesRegister(IMLSegment& seg, sint32 startIndex, IMLReg reg, sint32 maxScanDistance = -1)
-{
- sint32 endIndex = std::max(startIndex - maxScanDistance, 0);
- for (sint32 i = startIndex; i >= endIndex; i--)
- {
- IMLInstruction& imlInstruction = seg.imlList[i];
- IMLUsedRegisters registersUsed;
- imlInstruction.CheckRegisterUsage(®istersUsed);
- if (registersUsed.IsBaseGPRWritten(reg))
- return i;
- }
- return -1;
-}
-
-// returns true if the instruction can safely be moved while keeping ordering constraints and data dependencies intact
-// initialIndex is inclusive, targetIndex is exclusive
-bool IMLUtil_CanMoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex)
-{
- boost::container::static_vector regsWritten;
- boost::container::static_vector regsRead;
- // get list of read and written registers
- IMLUsedRegisters registersUsed;
- seg.imlList[initialIndex].CheckRegisterUsage(®istersUsed);
- registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) {
- if (isWritten)
- regsWritten.push_back(reg.GetRegID());
- else
- regsRead.push_back(reg.GetRegID());
- });
- // check all the instructions inbetween
- if(initialIndex < targetIndex)
- {
- sint32 scanStartIndex = initialIndex+1; // +1 to skip the moving instruction itself
- sint32 scanEndIndex = targetIndex;
- for (sint32 i = scanStartIndex; i < scanEndIndex; i++)
- {
- IMLUsedRegisters registersUsed;
- seg.imlList[i].CheckRegisterUsage(®istersUsed);
- // in order to be able to move an instruction past another instruction, any of the read registers must not be modified (written)
- // and any of it's written registers must not be read
- bool canMove = true;
- registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) {
- IMLRegID regId = reg.GetRegID();
- if (!isWritten)
- canMove = canMove && std::find(regsWritten.begin(), regsWritten.end(), regId) == regsWritten.end();
- else
- canMove = canMove && std::find(regsRead.begin(), regsRead.end(), regId) == regsRead.end();
- });
- if(!canMove)
- return false;
- }
- }
- else
- {
- cemu_assert_unimplemented(); // backwards scan is todo
- return false;
- }
- return true;
-}
-
-sint32 IMLUtil_CountRegisterReadsInRange(IMLSegment& seg, sint32 scanStartIndex, sint32 scanEndIndex, IMLRegID regId)
-{
- cemu_assert_debug(scanStartIndex <= scanEndIndex);
- cemu_assert_debug(scanEndIndex < seg.imlList.size());
- sint32 count = 0;
- for (sint32 i = scanStartIndex; i <= scanEndIndex; i++)
- {
- IMLUsedRegisters registersUsed;
- seg.imlList[i].CheckRegisterUsage(®istersUsed);
- registersUsed.ForEachReadGPR([&](IMLReg reg) {
- if (reg.GetRegID() == regId)
- count++;
- });
- }
- return count;
-}
-
-// move instruction from one index to another
-// instruction will be inserted before the instruction at targetIndex
-// returns the new instruction index of the moved instruction
-sint32 IMLUtil_MoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex)
-{
- cemu_assert_debug(initialIndex != targetIndex);
- IMLInstruction temp = seg.imlList[initialIndex];
- if (initialIndex < targetIndex)
- {
- cemu_assert_debug(targetIndex > 0);
- targetIndex--;
- for(size_t i=initialIndex; i regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg);
-
- // start with suffix instruction
- if(seg.HasSuffixInstruction())
- {
- IMLInstruction& imlInstruction = seg.imlList[seg.GetSuffixInstructionIndex()];
- IMLUsedRegisters registersUsed;
- imlInstruction.CheckRegisterUsage(®istersUsed);
- registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
- regsNeeded.erase(reg.GetRegID());
- });
- registersUsed.ForEachReadGPR([&](IMLReg reg) {
- regsNeeded.insert(reg.GetRegID());
- });
- }
- // iterate instructions backwards
- for (sint32 i = seg.imlList.size() - (seg.HasSuffixInstruction() ? 2:1); i >= 0; i--)
- {
- IMLInstruction& imlInstruction = seg.imlList[i];
- IMLUsedRegisters registersUsed;
- imlInstruction.CheckRegisterUsage(®istersUsed);
- // register read -> remove from overwritten list
- // register written -> add to overwritten list
-
- // check if this instruction only writes registers which will never be read
- bool onlyWritesRedundantRegisters = true;
- registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
- if (regsNeeded.contains(reg.GetRegID()))
- onlyWritesRedundantRegisters = false;
- });
- // check if any of the written registers are read after this point
- registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
- regsNeeded.erase(reg.GetRegID());
- });
- registersUsed.ForEachReadGPR([&](IMLReg reg) {
- regsNeeded.insert(reg.GetRegID());
- });
- if(!imlInstruction.HasSideEffects() && onlyWritesRedundantRegisters)
- {
- imlInstruction.make_no_op();
- }
- }
-}
-
-void IMLOptimizerX86_SubstituteCJumpForEflagsJump(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
-{
- // convert and optimize bool condition jumps to eflags condition jumps
- // - Moves eflag setter (e.g. cmp) closer to eflags consumer (conditional jump) if necessary. If not possible but required then exit early
- // - Since we only rely on eflags, the boolean register can be optimized out if DCE considers it unused
- // - Further detect and optimize patterns like DEC + CMP + JCC into fused ops (todo)
-
- // check if this segment ends with a conditional jump
- if(!seg.HasSuffixInstruction())
- return;
- sint32 cjmpInstIndex = seg.GetSuffixInstructionIndex();
- if(cjmpInstIndex < 0)
- return;
- IMLInstruction& cjumpInstr = seg.imlList[cjmpInstIndex];
- if( cjumpInstr.type != PPCREC_IML_TYPE_CONDITIONAL_JUMP )
- return;
- IMLReg regCondBool = cjumpInstr.op_conditional_jump.registerBool;
- bool invertedCondition = !cjumpInstr.op_conditional_jump.mustBeTrue;
- // find the instruction which sets the bool
- sint32 cmpInstrIndex = IMLUtil_FindInstructionWhichWritesRegister(seg, cjmpInstIndex-1, regCondBool, 20);
- if(cmpInstrIndex < 0)
- return;
- // check if its an instruction combo which can be optimized (currently only cmp + cjump) and get the condition
- IMLInstruction& condSetterInstr = seg.imlList[cmpInstrIndex];
- IMLCondition cond;
- if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE)
- cond = condSetterInstr.op_compare.cond;
- else if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE_S32)
- cond = condSetterInstr.op_compare_s32.cond;
- else
- return;
- // check if instructions inbetween modify eflags
- sint32 indexEflagsSafeStart = -1; // index of the first instruction which does not modify eflags up to cjump
- for(sint32 i = cjmpInstIndex-1; i > cmpInstrIndex; i--)
- {
- if(IMLOptimizerX86_ModifiesEFlags(seg.imlList[i]))
- {
- indexEflagsSafeStart = i+1;
- break;
- }
- }
- if(indexEflagsSafeStart >= 0)
- {
- cemu_assert(indexEflagsSafeStart > 0);
- // there are eflags-modifying instructions inbetween the bool setter and cjump
- // try to move the eflags setter close enough to the cjump (to indexEflagsSafeStart)
- bool canMove = IMLUtil_CanMoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart);
- if(!canMove)
- {
- return;
- }
- else
- {
- cmpInstrIndex = IMLUtil_MoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart);
- }
- }
- // we can turn the jump into an eflags jump
- cjumpInstr.make_x86_eflags_jcc(cond, invertedCondition);
-
- if (IMLUtil_CountRegisterReadsInRange(seg, cmpInstrIndex, cjmpInstIndex, regCondBool.GetRegID()) > 1 || regIoAnalysis.IsRegisterNeededAtEndOfSegment(seg, regCondBool.GetRegID()))
- return; // bool register is used beyond the CMP, we can't drop it
-
- auto& cmpInstr = seg.imlList[cmpInstrIndex];
- cemu_assert_debug(cmpInstr.type == PPCREC_IML_TYPE_COMPARE || cmpInstr.type == PPCREC_IML_TYPE_COMPARE_S32);
- if(cmpInstr.type == PPCREC_IML_TYPE_COMPARE)
- {
- IMLReg regA = cmpInstr.op_compare.regA;
- IMLReg regB = cmpInstr.op_compare.regB;
- seg.imlList[cmpInstrIndex].make_r_r(PPCREC_IML_OP_X86_CMP, regA, regB);
- }
- else
- {
- IMLReg regA = cmpInstr.op_compare_s32.regA;
- sint32 val = cmpInstr.op_compare_s32.immS32;
- seg.imlList[cmpInstrIndex].make_r_s32(PPCREC_IML_OP_X86_CMP, regA, val);
- }
-
-}
-
-void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
-{
- IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg);
-
-#ifdef ARCH_X86_64
- // x86 specific optimizations
- IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis)
-#endif
-}
-
-void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext)
-{
- IMLOptimizerRegIOAnalysis regIoAnalysis(ppcImlGenContext.segmentList2, ppcImlGenContext.GetMaxRegId());
- regIoAnalysis.ComputeDepedencies();
- for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
- {
- IMLOptimizer_StandardOptimizationPassForSegment(regIoAnalysis, *segIt);
- }
-}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
deleted file mode 100644
index 935e61ac..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
+++ /dev/null
@@ -1,2204 +0,0 @@
-#include "IML.h"
-
-#include "../PPCRecompiler.h"
-#include "../PPCRecompilerIml.h"
-#include "IMLRegisterAllocator.h"
-#include "IMLRegisterAllocatorRanges.h"
-
-#include "../BackendX64/BackendX64.h"
-#ifdef __aarch64__
-#include "../BackendAArch64/BackendAArch64.h"
-#endif
-
-#include
-#include
-
-#include "Common/cpu_features.h"
-
-#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed
-#define DEBUG_RA_INSTRUCTION_GEN 0
-
-struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment
-{
- IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd)
- : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {};
-
- void TrackInstruction(sint32 index)
- {
- usageStart = std::min(usageStart, index);
- usageEnd = std::max(usageEnd, index + 1); // exclusive index
- }
-
- sint32 usageStart;
- sint32 usageEnd;
- bool isProcessed{false};
- IMLRegFormat regBaseFormat;
-};
-
-struct IMLRegisterAllocatorContext
-{
- IMLRegisterAllocatorParameters* raParam;
- ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec
-
- std::unordered_map regIdToBaseFormat;
- // first pass
- std::vector> perSegmentAbstractRanges;
-
- // helper methods
- inline std::unordered_map& GetSegmentAbstractRangeMap(IMLSegment* imlSegment)
- {
- return perSegmentAbstractRanges[imlSegment->momentaryIndex];
- }
-
- inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const
- {
- auto it = regIdToBaseFormat.find(regId);
- cemu_assert_debug(it != regIdToBaseFormat.cend());
- return it->second;
- }
-};
-
-struct IMLFixedRegisters
-{
- struct Entry
- {
- Entry(IMLReg reg, IMLPhysRegisterSet physRegSet)
- : reg(reg), physRegSet(physRegSet) {}
-
- IMLReg reg;
- IMLPhysRegisterSet physRegSet;
- };
- boost::container::small_vector listInput; // fixed register requirements for instruction input edge
- boost::container::small_vector listOutput; // fixed register requirements for instruction output edge
-};
-
-static void SetupCallingConvention(const IMLInstruction* instruction, IMLFixedRegisters& fixedRegs, const IMLPhysReg intParamToPhysReg[3], const IMLPhysReg floatParamToPhysReg[3], const IMLPhysReg intReturnPhysReg, const IMLPhysReg floatReturnPhysReg, IMLPhysRegisterSet volatileRegisters)
-{
- sint32 numIntParams = 0, numFloatParams = 0;
-
- auto AddParameterMapping = [&](IMLReg reg) {
- if (!reg.IsValid())
- return;
- if (reg.GetBaseFormat() == IMLRegFormat::I64)
- {
- IMLPhysRegisterSet ps;
- ps.SetAvailable(intParamToPhysReg[numIntParams]);
- fixedRegs.listInput.emplace_back(reg, ps);
- numIntParams++;
- }
- else if (reg.GetBaseFormat() == IMLRegFormat::F64)
- {
- IMLPhysRegisterSet ps;
- ps.SetAvailable(floatParamToPhysReg[numFloatParams]);
- fixedRegs.listInput.emplace_back(reg, ps);
- numFloatParams++;
- }
- else
- {
- cemu_assert_suspicious();
- }
- };
- AddParameterMapping(instruction->op_call_imm.regParam0);
- AddParameterMapping(instruction->op_call_imm.regParam1);
- AddParameterMapping(instruction->op_call_imm.regParam2);
- // return value
- if (instruction->op_call_imm.regReturn.IsValid())
- {
- IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
- bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
- IMLPhysRegisterSet ps;
- if (isIntegerFormat)
- {
- ps.SetAvailable(intReturnPhysReg);
- volatileRegisters.SetReserved(intReturnPhysReg);
- }
- else
- {
- ps.SetAvailable(floatReturnPhysReg);
- volatileRegisters.SetReserved(floatReturnPhysReg);
- }
- fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
- }
- // block volatile registers from being used on the output edge, this makes the register allocator store them during the call
- fixedRegs.listOutput.emplace_back(IMLREG_INVALID, volatileRegisters);
-}
-
-#if defined(__aarch64__)
-// aarch64
-static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
-{
- fixedRegs.listInput.clear();
- fixedRegs.listOutput.clear();
-
- // The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
- // on x86 this is used for instructions like SHL , CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
- if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
- {
- const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
- const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
- IMLPhysRegisterSet volatileRegs;
- for (int i = 0; i <= 17; i++) // x0 to x17 are volatile
- volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
- // v0-v7 & v16-v31 are volatile. For v8-v15 only the high 64 bits are volatile.
- for (int i = 0; i <= 7; i++)
- volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
- for (int i = 16; i <= 31; i++)
- volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
- SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
- }
-}
-#else
-// x86-64
-static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
-{
- fixedRegs.listInput.clear();
- fixedRegs.listOutput.clear();
-
- if (instruction->type == PPCREC_IML_TYPE_R_R_R)
- {
- if (instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
- {
- if(!g_CPUFeatures.x86.bmi2)
- {
- IMLPhysRegisterSet ps;
- ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_ECX);
- fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps);
- }
- }
- }
- else if (instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
- {
- IMLPhysRegisterSet ps;
- ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX);
- fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX
- fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX
- }
- else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
- {
- const IMLPhysReg intParamToPhysReg[3] = {IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8};
- const IMLPhysReg floatParamToPhysReg[3] = {IMLArchX86::PHYSREG_FPR_BASE + 0, IMLArchX86::PHYSREG_FPR_BASE + 1, IMLArchX86::PHYSREG_FPR_BASE + 2};
- IMLPhysRegisterSet volatileRegs;
- volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
- volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
- volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
- volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
- volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
- volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
- volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
- // YMM0-YMM5 are volatile
- for (int i = 0; i <= 5; i++)
- volatileRegs.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + i);
- // for YMM6-YMM15 only the upper 128 bits are volatile which we dont use
- SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX, IMLArchX86::PHYSREG_FPR_BASE + 0, volatileRegs);
- }
-}
-#endif
-
-uint32 IMLRA_GetNextIterationIndex()
-{
- static uint32 recRACurrentIterationIndex = 0;
- recRACurrentIterationIndex++;
- return recRACurrentIterationIndex;
-}
-
-bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex, IMLSegment* imlSegmentLoopBase)
-{
- if (currentSegment == imlSegmentLoopBase)
- return true;
- if (currentSegment->raInfo.lastIterationIndex == iterationIndex)
- return currentSegment->raInfo.isPartOfProcessedLoop;
- if (depth >= 9)
- return false;
- currentSegment->raInfo.lastIterationIndex = iterationIndex;
- currentSegment->raInfo.isPartOfProcessedLoop = false;
-
- if (currentSegment->nextSegmentIsUncertain)
- return false;
- if (currentSegment->nextSegmentBranchNotTaken)
- {
- if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex)
- {
- currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
- }
- }
- if (currentSegment->nextSegmentBranchTaken)
- {
- if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex)
- {
- currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
- }
- }
- if (currentSegment->raInfo.isPartOfProcessedLoop)
- currentSegment->loopDepth++;
- return currentSegment->raInfo.isPartOfProcessedLoop;
-}
-
-void IMLRA_DetectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase)
-{
- uint32 iterationIndex = IMLRA_GetNextIterationIndex();
- imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex;
- if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase))
- {
- imlSegmentLoopBase->loopDepth++;
- }
-}
-
-void IMLRA_IdentifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
-{
- if (imlSegment->nextSegmentIsUncertain)
- return;
- // check if this segment has a branch that links to itself (tight loop)
- if (imlSegment->nextSegmentBranchTaken == imlSegment)
- {
- // segment loops over itself
- imlSegment->loopDepth++;
- return;
- }
- // check if this segment has a branch that goes backwards (potential complex loop)
- if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex)
- {
- IMLRA_DetectLoop(ppcImlGenContext, imlSegment);
- }
-}
-
-#define SUBRANGE_LIST_SIZE (128)
-
-sint32 IMLRA_CountDistanceUntilNextUse(raLivenessRange* subrange, raInstructionEdge startPosition)
-{
- for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++)
- {
- if (subrange->list_accessLocations[i].pos >= startPosition)
- {
- auto& it = subrange->list_accessLocations[i];
- cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write
- cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment());
- return it.pos.GetRaw() - startPosition.GetRaw();
- }
- }
- cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000);
- return 10001 * 2;
-}
-
-// returns -1 if there is no fixed register requirement on or after startPosition
-sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLivenessRange* range, raInstructionEdge startPosition, sint32 physRegister, bool& hasFixedAccess)
-{
- hasFixedAccess = false;
- cemu_assert_debug(startPosition.IsInstructionIndex());
- for (auto& fixedReqEntry : range->list_fixedRegRequirements)
- {
- if (fixedReqEntry.pos < startPosition)
- continue;
- if (fixedReqEntry.allowedReg.IsAvailable(physRegister))
- {
- hasFixedAccess = true;
- return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw();
- }
- }
- cemu_assert_debug(range->interval.end.IsInstructionIndex());
- return range->interval.end.GetRaw() - startPosition.GetRaw();
-}
-
-sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister)
-{
- cemu_assert_debug(startPosition.IsInstructionIndex());
- raInstructionEdge lastPos2;
- lastPos2.Set(imlSegment->imlList.size(), false);
-
- raInstructionEdge endPos;
- endPos = startPosition + maxDistance;
- if (endPos > lastPos2)
- endPos = lastPos2;
- IMLFixedRegisters fixedRegs;
- if (startPosition.IsOnOutputEdge())
- GetInstructionFixedRegisters(imlSegment->imlList.data() + startPosition.GetInstructionIndex(), fixedRegs);
- for (raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos)
- {
- if (currentPos.IsOnInputEdge())
- {
- GetInstructionFixedRegisters(imlSegment->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
- }
- auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
- for (auto& fixedRegLoc : fixedRegAccess)
- {
- if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
- {
- cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers
- if (fixedRegLoc.physRegSet.IsAvailable(physRegister))
- return currentPos.GetRaw() - startPosition.GetRaw();
- }
- }
- }
- return endPos.GetRaw() - startPosition.GetRaw();
-}
-
-// count how many instructions there are until physRegister is used by any subrange or reserved for any fixed register requirement (returns 0 if register is in use at startIndex)
-sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister)
-{
- cemu_assert_debug(startPosition.IsInstructionIndex());
- sint32 minDistance = (sint32)imlSegment->imlList.size() * 2 - startPosition.GetRaw();
- // next
- raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- while (subrangeItr)
- {
- if (subrangeItr->GetPhysicalRegister() != physRegister)
- {
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- continue;
- }
- if (subrangeItr->interval.ContainsEdge(startPosition))
- return 0;
- if (subrangeItr->interval.end < startPosition)
- {
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- continue;
- }
- cemu_assert_debug(startPosition <= subrangeItr->interval.start);
- sint32 currentDist = subrangeItr->interval.start.GetRaw() - startPosition.GetRaw();
- minDistance = std::min(minDistance, currentDist);
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- }
- return minDistance;
-}
-
-struct IMLRALivenessTimeline
-{
- IMLRALivenessTimeline()
- {
- }
-
- // manually add an active range
- void AddActiveRange(raLivenessRange* subrange)
- {
- activeRanges.emplace_back(subrange);
- }
-
- void ExpireRanges(raInstructionEdge expireUpTo)
- {
- expiredRanges.clear();
- size_t count = activeRanges.size();
- for (size_t f = 0; f < count; f++)
- {
- raLivenessRange* liverange = activeRanges[f];
- if (liverange->interval.end < expireUpTo) // this was <= but since end is not inclusive we need to use <
- {
-#ifdef CEMU_DEBUG_ASSERT
- if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken))
- assert_dbg(); // infinite subranges should not expire
-#endif
- expiredRanges.emplace_back(liverange);
- // remove entry
- activeRanges[f] = activeRanges[count - 1];
- f--;
- count--;
- }
- }
- if (count != activeRanges.size())
- activeRanges.resize(count);
- }
-
- std::span GetExpiredRanges()
- {
- return {expiredRanges.data(), expiredRanges.size()};
- }
-
- std::span GetActiveRanges()
- {
- return {activeRanges.data(), activeRanges.size()};
- }
-
- raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId)
- {
- for (auto& it : activeRanges)
- if (it->virtualRegister == regId)
- return it;
- return nullptr;
- }
-
- raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg)
- {
- cemu_assert_debug(physReg >= 0);
- for (auto& it : activeRanges)
- if (it->physicalRegister == physReg)
- return it;
- return nullptr;
- }
-
- boost::container::small_vector activeRanges;
-
- private:
- boost::container::small_vector expiredRanges;
-};
-
-// mark occupied registers by any overlapping range as unavailable in physRegSet
-void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet)
-{
- auto clusterRanges = range2->GetAllSubrangesInCluster();
- for (auto& subrange : clusterRanges)
- {
- IMLSegment* imlSegment = subrange->imlSegment;
- raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- while (subrangeItr)
- {
- if (subrange == subrangeItr)
- {
- // next
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- continue;
- }
- if (subrange->interval.IsOverlapping(subrangeItr->interval))
- {
- if (subrangeItr->GetPhysicalRegister() >= 0)
- physRegSet.SetReserved(subrangeItr->GetPhysicalRegister());
- }
- // next
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- }
- }
-}
-
-bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs)
-{
- return lhs->interval.start < rhs->interval.start;
-}
-
-void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
-{
- raLivenessRange* subrangeList[4096 + 1];
- sint32 count = 0;
- // disassemble linked list
- raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- while (subrangeItr)
- {
- cemu_assert(count < 4096);
- subrangeList[count] = subrangeItr;
- count++;
- // next
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- }
- if (count == 0)
- {
- imlSegment->raInfo.linkedList_allSubranges = nullptr;
- return;
- }
- // sort
- std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare);
- // reassemble linked list
- subrangeList[count] = nullptr;
- imlSegment->raInfo.linkedList_allSubranges = subrangeList[0];
- subrangeList[0]->link_allSegmentRanges.prev = nullptr;
- subrangeList[0]->link_allSegmentRanges.next = subrangeList[1];
- for (sint32 i = 1; i < count; i++)
- {
- subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1];
- subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1];
- }
- // validate list
-#if DEBUG_RA_EXTRA_VALIDATION
- sint32 count2 = 0;
- subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- raInstructionEdge currentStartPosition;
- currentStartPosition.SetRaw(RA_INTER_RANGE_START);
- while (subrangeItr)
- {
- count2++;
- if (subrangeItr->interval2.start < currentStartPosition)
- assert_dbg();
- currentStartPosition = subrangeItr->interval2.start;
- // next
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- }
- if (count != count2)
- assert_dbg();
-#endif
-}
-
-std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment)
-{
- return imlSegment->raInfo.linkedList_perVirtualRegister;
-}
-
-raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId)
-{
- auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId);
- if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end())
- return nullptr;
- return it->second;
-}
-
-struct raFixedRegRequirementWithVGPR
-{
- raFixedRegRequirementWithVGPR(raInstructionEdge pos, IMLPhysRegisterSet allowedReg, IMLRegID regId)
- : pos(pos), allowedReg(allowedReg), regId(regId) {}
-
- raInstructionEdge pos;
- IMLPhysRegisterSet allowedReg;
- IMLRegID regId;
-};
-
-std::vector IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment)
-{
- std::vector frrList;
- size_t index = 0;
- while (index < imlSegment->imlList.size())
- {
- IMLFixedRegisters fixedRegs;
- GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
- raInstructionEdge pos;
- pos.Set(index, true);
- for (auto& fixedRegAccess : fixedRegs.listInput)
- {
- frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
- }
- pos = pos + 1;
- for (auto& fixedRegAccess : fixedRegs.listOutput)
- {
- frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
- }
- index++;
- }
- return frrList;
-}
-
-boost::container::small_vector IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg)
-{
- boost::container::small_vector rangeList;
- for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
- {
- if (!currentRange->interval.ContainsEdge(pos))
- continue;
- IMLPhysRegisterSet allowedRegs;
- if (!currentRange->GetAllowedRegistersEx(allowedRegs))
- continue;
- if (allowedRegs.IsAvailable(physReg))
- rangeList.emplace_back(currentRange);
- }
- return rangeList;
-}
-
-void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
-{
- // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border
- // todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This can be avoided in some cases
- for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;)
- {
- IMLPhysRegisterSet allowedRegs;
- if(currentRange->list_fixedRegRequirements.empty())
- {
- currentRange = currentRange->link_allSegmentRanges.next;
- continue; // since we run this pass for every segment we dont need to do global checks here for clusters which may not even have fixed register requirements
- }
- if (!currentRange->GetAllowedRegistersEx(allowedRegs))
- {
- currentRange = currentRange->link_allSegmentRanges.next;
- continue;
- }
- if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
- {
- raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next;
- IMLRA_ExplodeRangeCluster(ppcImlGenContext, currentRange);
- currentRange = nextRange;
- continue;
- }
- currentRange = currentRange->link_allSegmentRanges.next;
- }
- // second pass - look for ranges with conflicting fixed register requirements and split these too (locally)
- for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
- {
- IMLPhysRegisterSet allowedRegs;
- if (currentRange->list_fixedRegRequirements.empty())
- continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
- if (!currentRange->GetAllowedRegistersEx(allowedRegs))
- continue;
- if (allowedRegs.HasAnyAvailable())
- continue;
- cemu_assert_unimplemented();
- }
- // third pass - assign fixed registers, split ranges if needed
- std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
- std::unordered_map lastVGPR;
- for (size_t i = 0; i < frr.size(); i++)
- {
- raFixedRegRequirementWithVGPR& entry = frr[i];
- // we currently only handle fixed register requirements with a single register
- // with one exception: When regId is IMLRegID_INVALID then the entry acts as a list of reserved registers
- cemu_assert_debug(entry.regId == IMLRegID_INVALID || entry.allowedReg.HasExactlyOneAvailable());
- for (IMLPhysReg physReg = entry.allowedReg.GetFirstAvailableReg(); physReg >= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg + 1))
- {
- // check if the assigned vGPR has changed
- bool vgprHasChanged = false;
- auto it = lastVGPR.find(physReg);
- if (it != lastVGPR.end())
- vgprHasChanged = it->second != entry.regId;
- else
- vgprHasChanged = true;
- lastVGPR[physReg] = entry.regId;
-
- if (!vgprHasChanged)
- continue;
-
- boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg);
- if (entry.regId != IMLRegID_INVALID)
- cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers
-
- for (auto& range : overlappingRanges)
- {
- if (range->interval.start < entry.pos)
- {
- IMLRA_SplitRange(ppcImlGenContext, range, entry.pos, true);
- }
- }
- }
- }
- // finally iterate ranges and assign fixed registers
- for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
- {
- IMLPhysRegisterSet allowedRegs;
- if (currentRange->list_fixedRegRequirements.empty())
- continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
- if (!currentRange->GetAllowedRegistersEx(allowedRegs))
- {
- cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
- continue;
- }
- cemu_assert_debug(allowedRegs.HasExactlyOneAvailable());
- currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg());
- }
- // DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned
-#if DEBUG_RA_EXTRA_VALIDATION
- for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
- {
- IMLPhysRegisterSet allowedRegs;
- if (!currentRange->HasPhysicalRegister())
- continue;
- for (raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next)
- {
- if (currentRange == currentRange2)
- continue;
- if (currentRange->interval2.IsOverlapping(currentRange2->interval2))
- {
- cemu_assert_debug(currentRange->GetPhysicalRegister() != currentRange2->GetPhysicalRegister());
- }
- }
- }
-#endif
-}
-
-// we should not split ranges on instructions with tied registers (i.e. where a register encoded as a single parameter is both input and output)
-// otherwise the RA algorithm has to assign both ranges the same physical register (not supported yet) and the point of splitting to fit another range is nullified
-void IMLRA_MakeSafeSplitPosition(IMLSegment* imlSegment, raInstructionEdge& pos)
-{
- // we ignore the instruction for now and just always make it a safe split position
- cemu_assert_debug(pos.IsInstructionIndex());
- if (pos.IsOnOutputEdge())
- pos = pos - 1;
-}
-
-// convenience wrapper for IMLRA_MakeSafeSplitPosition
-void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge startPos, sint32& distance)
-{
- cemu_assert_debug(startPos.IsInstructionIndex());
- cemu_assert_debug(distance >= 0);
- raInstructionEdge endPos = startPos + distance;
- IMLRA_MakeSafeSplitPosition(imlSegment, endPos);
- if (endPos < startPos)
- {
- distance = 0;
- return;
- }
- distance = endPos.GetRaw() - startPos.GetRaw();
-}
-
-static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx);
-
-class RASpillStrategy
-{
- public:
- virtual void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) = 0;
-
- sint32 GetCost()
- {
- return strategyCost;
- }
-
- protected:
- void ResetCost()
- {
- strategyCost = INT_MAX;
- }
-
- sint32 strategyCost;
-};
-
-class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy
-{
- public:
- void Reset()
- {
- localRangeHoleCutting.distance = -1;
- localRangeHoleCutting.largestHoleSubrange = nullptr;
- ResetCost();
- }
-
- void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
- {
- raInstructionEdge currentRangeStart = currentRange->interval.start;
- sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
- cemu_assert_debug(localRangeHoleCutting.distance == -1);
- cemu_assert_debug(strategyCost == INT_MAX);
- if (!currentRangeStart.ConnectsToPreviousSegment())
- {
- cemu_assert_debug(currentRangeStart.GetRaw() >= 0);
- for (auto candidate : timeline.activeRanges)
- {
- if (candidate->interval.ExtendsIntoNextSegment())
- continue;
- // new checks (Oct 2024):
- if (candidate == currentRange)
- continue;
- if (candidate->GetPhysicalRegister() < 0)
- continue;
- if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
- continue;
-
- sint32 distance2 = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart);
- IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance2);
- if (distance2 < 2)
- continue;
- cemu_assert_debug(currentRangeStart.IsInstructionIndex());
- distance2 = std::min(distance2, imlSegment->imlList.size() * 2 - currentRangeStart.GetRaw()); // limit distance to end of segment
- // calculate split cost of candidate
- sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2);
- // calculate additional split cost of currentRange if hole is not large enough
- if (distance2 < requiredSize2)
- {
- cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2);
- // we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes)
- cost += (requiredSize2 - distance2) / 10;
- }
- // compare cost with previous candidates
- if (cost < strategyCost)
- {
- strategyCost = cost;
- localRangeHoleCutting.distance = distance2;
- localRangeHoleCutting.largestHoleSubrange = candidate;
- }
- }
- }
- }
-
- void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
- {
- cemu_assert_debug(strategyCost != INT_MAX);
- sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
- raInstructionEdge currentRangeStart = currentRange->interval.start;
-
- raInstructionEdge holeStartPosition = currentRangeStart;
- raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance;
- raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange;
-
- if (collisionRange->interval.start < holeStartPosition)
- {
- collisionRange = IMLRA_SplitRange(nullptr, collisionRange, holeStartPosition, true);
- cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point
- cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough
- }
- else
- {
- cemu_assert_unimplemented(); // we still need to trim?
- }
- // we may also have to cut the current range to fit partially into the hole
- if (requiredSize2 > localRangeHoleCutting.distance)
- {
- raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true);
- if (tailRange)
- {
- cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
- tailRange->UnsetPhysicalRegister();
- }
- }
- // verify that the hole is large enough
- if (collisionRange)
- {
- cemu_assert_debug(!collisionRange->interval.IsOverlapping(currentRange->interval));
- }
- }
-
- private:
- struct
- {
- sint32 distance;
- raLivenessRange* largestHoleSubrange;
- } localRangeHoleCutting;
-};
-
-class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy
-{
- // split current range (this is generally only a good choice when the current range is long but has few usages)
- public:
- void Reset()
- {
- ResetCost();
- availableRegisterHole.distance = -1;
- availableRegisterHole.physRegister = -1;
- }
-
- void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& localAvailableRegsMask, const IMLPhysRegisterSet& allowedRegs)
- {
- sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
-
- raInstructionEdge currentRangeStart = currentRange->interval.start;
- cemu_assert_debug(strategyCost == INT_MAX);
- availableRegisterHole.distance = -1;
- availableRegisterHole.physRegister = -1;
- if (currentRangeStart.GetRaw() >= 0)
- {
- if (localAvailableRegsMask.HasAnyAvailable())
- {
- sint32 physRegItr = -1;
- while (true)
- {
- physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1);
- if (physRegItr < 0)
- break;
- if (!allowedRegs.IsAvailable(physRegItr))
- continue;
- // get size of potential hole for this register
- sint32 distance = PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(imlSegment, currentRangeStart, physRegItr);
-
- // some instructions may require the same register for another range, check the distance here
- sint32 distUntilFixedReg = IMLRA_CountDistanceUntilFixedRegUsage(imlSegment, currentRangeStart, distance, currentRange->GetVirtualRegister(), physRegItr);
- if (distUntilFixedReg < distance)
- distance = distUntilFixedReg;
-
- IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
- if (distance < 2)
- continue;
- // calculate additional cost due to split
- cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register?
- sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
- // add small additional cost for the remaining range (prefer larger holes)
- cost += ((requiredSize2 - distance) / 2) / 10;
- if (cost < strategyCost)
- {
- strategyCost = cost;
- availableRegisterHole.distance = distance;
- availableRegisterHole.physRegister = physRegItr;
- }
- }
- }
- }
- }
-
- void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
- {
- cemu_assert_debug(strategyCost != INT_MAX);
- raInstructionEdge currentRangeStart = currentRange->interval.start;
- // use available register
- raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true);
- if (tailRange)
- {
- cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
- tailRange->UnsetPhysicalRegister();
- }
- }
-
- private:
- struct
- {
- sint32 physRegister;
- sint32 distance; // size of hole
- } availableRegisterHole;
-};
-
-class RASpillStrategy_ExplodeRange : public RASpillStrategy
-{
- public:
- void Reset()
- {
- ResetCost();
- explodeRange.range = nullptr;
- explodeRange.distance = -1;
- }
-
- void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
- {
- raInstructionEdge currentRangeStart = currentRange->interval.start;
- if (currentRangeStart.ConnectsToPreviousSegment())
- currentRangeStart.Set(0, true);
- sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
- cemu_assert_debug(strategyCost == INT_MAX);
- explodeRange.range = nullptr;
- explodeRange.distance = -1;
- for (auto candidate : timeline.activeRanges)
- {
- if (!candidate->interval.ExtendsIntoNextSegment())
- continue;
- // new checks (Oct 2024):
- if (candidate == currentRange)
- continue;
- if (candidate->GetPhysicalRegister() < 0)
- continue;
- if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
- continue;
-
- sint32 distance = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart);
- IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
- if (distance < 2)
- continue;
- sint32 cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
- // if the hole is not large enough, add cost of splitting current subrange
- if (distance < requiredSize2)
- {
- cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
- // add small additional cost for the remaining range (prefer larger holes)
- cost += ((requiredSize2 - distance) / 2) / 10;
- }
- // compare with current best candidate for this strategy
- if (cost < strategyCost)
- {
- strategyCost = cost;
- explodeRange.distance = distance;
- explodeRange.range = candidate;
- }
- }
- }
-
- void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
- {
- raInstructionEdge currentRangeStart = currentRange->interval.start;
- if (currentRangeStart.ConnectsToPreviousSegment())
- currentRangeStart.Set(0, true);
- sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
- // explode range
- IMLRA_ExplodeRangeCluster(nullptr, explodeRange.range);
- // split current subrange if necessary
- if (requiredSize2 > explodeRange.distance)
- {
- raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + explodeRange.distance, true);
- if (tailRange)
- {
- cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
- tailRange->UnsetPhysicalRegister();
- }
- }
- }
-
- private:
- struct
- {
- raLivenessRange* range;
- sint32 distance; // size of hole
- // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
- } explodeRange;
-};
-
-class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy
-{
- public:
- void Reset()
- {
- ResetCost();
- explodeRange.range = nullptr;
- explodeRange.distance = -1;
- }
-
- void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
- {
- // explode the range with the least cost
- cemu_assert_debug(strategyCost == INT_MAX);
- cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1);
- for (auto candidate : timeline.activeRanges)
- {
- if (!candidate->interval.ExtendsIntoNextSegment())
- continue;
- // only select candidates that clash with current subrange
- if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange)
- continue;
- // and also filter any that dont meet fixed register requirements
- if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
- continue;
- sint32 cost;
- cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
- // compare with current best candidate for this strategy
- if (cost < strategyCost)
- {
- strategyCost = cost;
- explodeRange.distance = INT_MAX;
- explodeRange.range = candidate;
- }
- }
- // add current range as a candidate too
- sint32 ownCost;
- ownCost = IMLRA_CalculateAdditionalCostOfRangeExplode(currentRange);
- if (ownCost < strategyCost)
- {
- strategyCost = ownCost;
- explodeRange.distance = INT_MAX;
- explodeRange.range = currentRange;
- }
- }
-
- void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
- {
- cemu_assert_debug(strategyCost != INT_MAX);
- IMLRA_ExplodeRangeCluster(ctx, explodeRange.range);
- }
-
- private:
- struct
- {
- raLivenessRange* range;
- sint32 distance; // size of hole
- // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
- }explodeRange;
-};
-
-// filter any registers from candidatePhysRegSet which cannot be used by currentRange due to fixed register requirements within the range that it occupies
-void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocatorContext& ctx, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
-{
- IMLSegment* seg = currentRange->imlSegment;
- if (seg->imlList.empty())
- return; // there can be no fixed register requirements if there are no instructions
-
- raInstructionEdge firstPos = currentRange->interval.start;
- if (currentRange->interval.start.ConnectsToPreviousSegment())
- firstPos.SetRaw(0);
- else if (currentRange->interval.start.ConnectsToNextSegment())
- firstPos.Set(seg->imlList.size() - 1, false);
-
- raInstructionEdge lastPos = currentRange->interval.end;
- if (currentRange->interval.end.ConnectsToPreviousSegment())
- lastPos.SetRaw(0);
- else if (currentRange->interval.end.ConnectsToNextSegment())
- lastPos.Set(seg->imlList.size() - 1, false);
- cemu_assert_debug(firstPos <= lastPos);
-
- IMLRegID ourRegId = currentRange->GetVirtualRegister();
-
- IMLFixedRegisters fixedRegs;
- if (firstPos.IsOnOutputEdge())
- GetInstructionFixedRegisters(seg->imlList.data() + firstPos.GetInstructionIndex(), fixedRegs);
- for (raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos)
- {
- if (currentPos.IsOnInputEdge())
- {
- GetInstructionFixedRegisters(seg->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
- }
- auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
- for (auto& fixedRegLoc : fixedRegAccess)
- {
- if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
- candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet);
- }
- }
-}
-
-// filter out any registers along the range cluster
-void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
-{
- cemu_assert_debug(currentRange->imlSegment == imlSegment);
- if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
- {
- auto clusterRanges = currentRange->GetAllSubrangesInCluster();
- for (auto& rangeIt : clusterRanges)
- {
- IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, rangeIt, candidatePhysRegSet);
- if (!candidatePhysRegSet.HasAnyAvailable())
- break;
- }
- return;
- }
- IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, currentRange, candidatePhysRegSet);
-}
-
-bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
-{
- // sort subranges ascending by start index
- _sortSegmentAllSubrangesLinkedList(imlSegment);
-
- IMLRALivenessTimeline livenessTimeline;
- raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- raInstructionEdge lastInstructionEdge;
- lastInstructionEdge.SetRaw(RA_INTER_RANGE_END);
-
- struct
- {
- RASpillStrategy_LocalRangeHoleCutting localRangeHoleCutting;
- RASpillStrategy_AvailableRegisterHole availableRegisterHole;
- RASpillStrategy_ExplodeRange explodeRange;
- // for ranges that connect to follow up segments:
- RASpillStrategy_ExplodeRangeInter explodeRangeInter;
- } strategy;
-
- while (subrangeItr)
- {
- raInstructionEdge currentRangeStart = subrangeItr->interval.start; // used to be currentIndex before refactor
- PPCRecRA_debugValidateSubrange(subrangeItr);
-
- livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges
-
- // if subrange already has register assigned then add it to the active list and continue
- if (subrangeItr->GetPhysicalRegister() >= 0)
- {
- // verify if register is actually available
-#if DEBUG_RA_EXTRA_VALIDATION
- for (auto& liverangeItr : livenessTimeline.activeRanges)
- {
- // check for register mismatch
- cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister());
- }
-#endif
- livenessTimeline.AddActiveRange(subrangeItr);
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- continue;
- }
- // ranges with fixed register requirements should already have a phys register assigned
- if (!subrangeItr->list_fixedRegRequirements.empty())
- {
- cemu_assert_debug(subrangeItr->HasPhysicalRegister());
- }
- // find free register for current subrangeItr and segment
- IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister());
- IMLPhysRegisterSet candidatePhysRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat);
- cemu_assert_debug(candidatePhysRegSet.HasAnyAvailable()); // no valid pool provided for this register type
-
- IMLPhysRegisterSet allowedRegs = subrangeItr->GetAllowedRegisters(candidatePhysRegSet);
- cemu_assert_debug(allowedRegs.HasAnyAvailable()); // if zero regs are available, then this range needs to be split to avoid mismatching register requirements (do this in the initial pass to keep the code here simpler)
- candidatePhysRegSet &= allowedRegs;
-
- for (auto& liverangeItr : livenessTimeline.activeRanges)
- {
- cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0);
- candidatePhysRegSet.SetReserved(liverangeItr->GetPhysicalRegister());
- }
- // check intersections with other ranges and determine allowed registers
- IMLPhysRegisterSet localAvailableRegsMask = candidatePhysRegSet; // mask of registers that are currently not used (does not include range checks in other segments)
- if (candidatePhysRegSet.HasAnyAvailable())
- {
- // check for overlaps on a global scale (subrangeItr can be part of a larger range cluster across multiple segments)
- PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, candidatePhysRegSet);
- }
- // some target instructions may enforce specific registers (e.g. common on X86 where something like SHL , CL forces CL as the count register)
- // we determine the list of allowed registers here
- // this really only works if we assume single-register requirements (otherwise its better not to filter out early and instead allow register corrections later but we don't support this yet)
- if (candidatePhysRegSet.HasAnyAvailable())
- {
- IMLRA_FilterReservedFixedRegisterRequirementsForCluster(ctx, imlSegment, subrangeItr, candidatePhysRegSet);
- }
- if (candidatePhysRegSet.HasAnyAvailable())
- {
- // use free register
- subrangeItr->SetPhysicalRegisterForCluster(candidatePhysRegSet.GetFirstAvailableReg());
- livenessTimeline.AddActiveRange(subrangeItr);
- subrangeItr = subrangeItr->link_allSegmentRanges.next; // next
- continue;
- }
- // there is no free register for the entire range
- // evaluate different strategies of splitting ranges to free up another register or shorten the current range
- strategy.localRangeHoleCutting.Reset();
- strategy.availableRegisterHole.Reset();
- strategy.explodeRange.Reset();
- // cant assign register
- // there might be registers available, we just can't use them due to range conflicts
- RASpillStrategy* selectedStrategy = nullptr;
- auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) {
- if (newStrategy.GetCost() == INT_MAX)
- return;
- if (selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost())
- selectedStrategy = &newStrategy;
- };
-
- if (!subrangeItr->interval.ExtendsIntoNextSegment())
- {
- // range ends in current segment, use local strategies
- // evaluate strategy: Cut hole into local subrange
- strategy.localRangeHoleCutting.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
- SelectStrategyIfBetter(strategy.localRangeHoleCutting);
- // evaluate strategy: Split current range to fit in available holes
- // todo - are checks required to avoid splitting on the suffix instruction?
- strategy.availableRegisterHole.Evaluate(imlSegment, subrangeItr, livenessTimeline, localAvailableRegsMask, allowedRegs);
- SelectStrategyIfBetter(strategy.availableRegisterHole);
- // evaluate strategy: Explode inter-segment ranges
- strategy.explodeRange.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
- SelectStrategyIfBetter(strategy.explodeRange);
- }
- else // if subrangeItr->interval2.ExtendsIntoNextSegment()
- {
- strategy.explodeRangeInter.Reset();
- strategy.explodeRangeInter.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
- SelectStrategyIfBetter(strategy.explodeRangeInter);
- }
- // choose strategy
- if (selectedStrategy)
- {
- selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr);
- }
- else
- {
- // none of the evulated strategies can be applied, this should only happen if the segment extends into the next segment(s) for which we have no good strategy
- cemu_assert_debug(subrangeItr->interval.ExtendsPreviousSegment());
- // alternative strategy if we have no other choice: explode current range
- IMLRA_ExplodeRangeCluster(ppcImlGenContext, subrangeItr);
- }
- return false;
- }
- return true;
-}
-
-void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext)
-{
- // start with frequently executed segments first
- sint32 maxLoopDepth = 0;
- for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
- {
- maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth);
- }
- // assign fixed registers first
- for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
- IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt);
-#if DEBUG_RA_EXTRA_VALIDATION
- // fixed registers are currently handled per-segment, but here we validate that they are assigned correctly on a global scope as well
- for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2)
- {
- for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
- {
- IMLPhysRegisterSet allowedRegs;
- if (!currentRange->GetAllowedRegistersEx(allowedRegs))
- {
- cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
- continue;
- }
- cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister()));
- }
- }
-#endif
-
- while (true)
- {
- bool done = false;
- for (sint32 d = maxLoopDepth; d >= 0; d--)
- {
- for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
- {
- if (segIt->loopDepth != d)
- continue;
- done = IMLRA_AssignSegmentRegisters(ctx, ppcImlGenContext, segIt);
- if (done == false)
- break;
- }
- if (done == false)
- break;
- }
- if (done)
- break;
- }
-}
-
-void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext)
-{
- // insert empty segments after every non-taken branch if the linked segment has more than one input
- // this gives the register allocator more room to create efficient spill code
- size_t segmentIndex = 0;
- while (segmentIndex < ppcImlGenContext->segmentList2.size())
- {
- IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
- if (imlSegment->nextSegmentIsUncertain)
- {
- segmentIndex++;
- continue;
- }
- if (imlSegment->nextSegmentBranchTaken == nullptr || imlSegment->nextSegmentBranchNotTaken == nullptr)
- {
- segmentIndex++;
- continue;
- }
- if (imlSegment->nextSegmentBranchNotTaken->list_prevSegments.size() <= 1)
- {
- segmentIndex++;
- continue;
- }
- if (imlSegment->nextSegmentBranchNotTaken->isEnterable)
- {
- segmentIndex++;
- continue;
- }
- PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1);
- IMLSegment* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0];
- IMLSegment* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1];
- IMLSegment* nextSegment = imlSegment->nextSegmentBranchNotTaken;
- IMLSegment_RemoveLink(imlSegmentP0, nextSegment);
- IMLSegment_SetLinkBranchNotTaken(imlSegmentP1, nextSegment);
- IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1);
- segmentIndex++;
- }
- // detect loops
- for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
- {
- IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
- imlSegment->momentaryIndex = s;
- }
- for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
- {
- IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
- IMLRA_IdentifyLoop(ppcImlGenContext, imlSegment);
- }
-}
-
-IMLRARegAbstractLiveness* _GetAbstractRange(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
-{
- auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
- auto it = segMap.find(regId);
- return it != segMap.end() ? &it->second : nullptr;
-}
-
-// scan instructions and establish register usage range for segment
-void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
-{
- size_t instructionIndex = 0;
- IMLUsedRegisters gprTracking;
- auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
- while (instructionIndex < imlSegment->imlList.size())
- {
- imlSegment->imlList[instructionIndex].CheckRegisterUsage(&gprTracking);
- gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
- IMLRegID gprId = gprReg.GetRegID();
- auto it = segDistMap.find(gprId);
- if (it == segDistMap.end())
- {
- segDistMap.try_emplace(gprId, gprReg.GetBaseFormat(), (sint32)instructionIndex, (sint32)instructionIndex + 1);
- ctx.regIdToBaseFormat.try_emplace(gprId, gprReg.GetBaseFormat());
- }
- else
- {
- it->second.TrackInstruction(instructionIndex);
-#ifdef CEMU_DEBUG_ASSERT
- cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same
-#endif
- }
- });
- instructionIndex++;
- }
-}
-
-void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
-{
- // for each register calculate min/max index of usage range within each segment
- size_t dbgIndex = 0;
- for (IMLSegment* segIt : ctx.deprGenContext->segmentList2)
- {
- cemu_assert_debug(segIt->momentaryIndex == dbgIndex);
- IMLRA_CalculateSegmentMinMaxAbstractRanges(ctx, segIt);
- dbgIndex++;
- }
-}
-
-raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name)
-{
- IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR);
- if (!abstractRange)
- return nullptr;
- if (abstractRange->isProcessed)
- {
- // return already existing segment
- raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR);
- cemu_assert_debug(existingRange);
- return existingRange;
- }
- abstractRange->isProcessed = true;
- // create subrange
- cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr);
- cemu_assert_debug(
- (abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) ||
- abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger
- sint32 inclusiveEnd = abstractRange->usageEnd;
- if (inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END)
- inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive
- raInterval interval;
- interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true);
- raLivenessRange* subrange = IMLRA_CreateRange(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end);
- // traverse forward
- if (abstractRange->usageEnd == RA_INTER_RANGE_END)
- {
- if (imlSegment->nextSegmentBranchTaken)
- {
- IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR);
- if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START)
- {
- subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name);
- subrange->subrangeBranchTaken->previousRanges.push_back(subrange);
- cemu_assert_debug(subrange->subrangeBranchTaken->interval.ExtendsPreviousSegment());
- }
- }
- if (imlSegment->nextSegmentBranchNotTaken)
- {
- IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR);
- if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START)
- {
- subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name);
- subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange);
- cemu_assert_debug(subrange->subrangeBranchNotTaken->interval.ExtendsPreviousSegment());
- }
- }
- }
- // traverse backward
- if (abstractRange->usageStart == RA_INTER_RANGE_START)
- {
- for (auto& it : imlSegment->list_prevSegments)
- {
- IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR);
- if (!prevRange)
- continue;
- if (prevRange->usageEnd == RA_INTER_RANGE_END)
- PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name);
- }
- }
- return subrange;
-}
-
-void IMLRA_UpdateOrAddSubrangeLocation(raLivenessRange* subrange, raInstructionEdge pos)
-{
- if (subrange->list_accessLocations.empty())
- {
- subrange->list_accessLocations.emplace_back(pos);
- return;
- }
- if(subrange->list_accessLocations.back().pos == pos)
- return;
- cemu_assert_debug(subrange->list_accessLocations.back().pos < pos);
- subrange->list_accessLocations.emplace_back(pos);
-}
-
-// take abstract range data and create LivenessRanges
-void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
-{
- const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment);
-
- auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) {
- raLivenessRange* subrange = regToSubrange.find(regId)->second;
- cemu_assert_debug(subrange);
- raFixedRegRequirement tmp;
- tmp.pos.Set(instructionIndex, isInput);
- tmp.allowedReg = physRegSet;
- if (subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos)
- subrange->list_fixedRegRequirements.push_back(tmp);
- };
-
- // convert abstract min-max ranges to liveness range objects
- auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
- for (auto& it : segMap)
- {
- if (it.second.isProcessed)
- continue;
- IMLRegID regId = it.first;
- PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second);
- }
- // fill created ranges with read/write location indices
- // note that at this point there is only one range per register per segment
- // and the algorithm below relies on this
- size_t index = 0;
- IMLUsedRegisters gprTracking;
- while (index < imlSegment->imlList.size())
- {
- imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
- raInstructionEdge pos((sint32)index, true);
- gprTracking.ForEachReadGPR([&](IMLReg gprReg) {
- IMLRegID gprId = gprReg.GetRegID();
- raLivenessRange* subrange = regToSubrange.find(gprId)->second;
- IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
- });
- pos = {(sint32)index, false};
- gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) {
- IMLRegID gprId = gprReg.GetRegID();
- raLivenessRange* subrange = regToSubrange.find(gprId)->second;
- IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
- });
- // check fixed register requirements
- IMLFixedRegisters fixedRegs;
- GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
- for (auto& fixedRegAccess : fixedRegs.listInput)
- {
- if (fixedRegAccess.reg != IMLREG_INVALID)
- AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet);
- }
- for (auto& fixedRegAccess : fixedRegs.listOutput)
- {
- if (fixedRegAccess.reg != IMLREG_INVALID)
- AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet);
- }
- index++;
- }
-}
-
-void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
-{
- auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
- auto it = segDistMap.find(regId);
- if (it == segDistMap.end())
- {
- sint32 startIndex;
- if (imlSegment->HasSuffixInstruction())
- startIndex = imlSegment->GetSuffixInstructionIndex();
- else
- startIndex = RA_INTER_RANGE_END;
- segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, startIndex, RA_INTER_RANGE_END);
- }
- else
- {
- it->second.usageEnd = RA_INTER_RANGE_END;
- }
-}
-
-void IMLRA_extendAbstractRangeToBeginningOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
-{
- auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
- auto it = segDistMap.find(regId);
- if (it == segDistMap.end())
- {
- segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, RA_INTER_RANGE_START, RA_INTER_RANGE_START);
- }
- else
- {
- it->second.usageStart = RA_INTER_RANGE_START;
- }
- // propagate backwards
- for (auto& it : imlSegment->list_prevSegments)
- {
- IMLRA_extendAbstractRangeToEndOfSegment(ctx, it, regId);
- }
-}
-
-void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regId, IMLSegment** route, sint32 routeDepth)
-{
-#ifdef CEMU_DEBUG_ASSERT
- if (routeDepth < 2)
- assert_dbg();
-#endif
- // extend starting range to end of segment
- IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[0], regId);
- // extend all the connecting segments in both directions
- for (sint32 i = 1; i < (routeDepth - 1); i++)
- {
- IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[i], regId);
- IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[i], regId);
- }
- // extend the final segment towards the beginning
- IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId);
-}
-
-void _IMLRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth)
-{
- if (routeDepth >= 64)
- {
- cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded\n");
- return;
- }
- route[routeDepth] = currentSegment;
-
- IMLRARegAbstractLiveness* range = _GetAbstractRange(ctx, currentSegment, regID);
-
- if (!range)
- {
- // measure distance over entire segment
- distanceLeft -= (sint32)currentSegment->imlList.size();
- if (distanceLeft > 0)
- {
- if (currentSegment->nextSegmentBranchNotTaken)
- _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1);
- if (currentSegment->nextSegmentBranchTaken)
- _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1);
- }
- return;
- }
- else
- {
- // measure distance to range
- if (range->usageStart == RA_INTER_RANGE_END)
- {
- if (distanceLeft < (sint32)currentSegment->imlList.size())
- return; // range too far away
- }
- else if (range->usageStart != RA_INTER_RANGE_START && range->usageStart > distanceLeft)
- return; // out of range
- // found close range -> connect ranges
- IMLRA_connectAbstractRanges(ctx, regID, route, routeDepth + 1);
- }
-}
-
-void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRARegAbstractLiveness* range, IMLRegID regID)
-{
- cemu_assert_debug(range->usageEnd >= 0);
- // count instructions to end of initial segment
- sint32 instructionsUntilEndOfSeg;
- if (range->usageEnd == RA_INTER_RANGE_END)
- instructionsUntilEndOfSeg = 0;
- else
- instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - range->usageEnd;
- cemu_assert_debug(instructionsUntilEndOfSeg >= 0);
- sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
- if (remainingScanDist <= 0)
- return; // can't reach end
-
- IMLSegment* route[64];
- route[0] = currentSegment;
- if (currentSegment->nextSegmentBranchNotTaken)
- _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1);
- if (currentSegment->nextSegmentBranchTaken)
- _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1);
-}
-
-void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
-{
- auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
- for (auto& it : segMap)
- {
- PPCRecRA_checkAndTryExtendRange(ctx, imlSegment, &(it.second), it.first);
- }
-#ifdef CEMU_DEBUG_ASSERT
- if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
- assert_dbg();
- if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
- assert_dbg();
-#endif
-}
-
-void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
-{
- std::vector list_segments;
- std::vector list_processedSegment;
- size_t segmentCount = ctx.deprGenContext->segmentList2.size();
- list_segments.reserve(segmentCount + 1);
- list_processedSegment.resize(segmentCount);
-
- auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {
- list_processedSegment[seg->momentaryIndex] = true;
- };
- auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool {
- return list_processedSegment[seg->momentaryIndex];
- };
- markSegProcessed(imlSegment);
-
- sint32 index = 0;
- list_segments.push_back(imlSegment);
- while (index < list_segments.size())
- {
- IMLSegment* currentSegment = list_segments[index];
- PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment);
- // follow flow
- if (currentSegment->nextSegmentBranchNotTaken && !isSegProcessed(currentSegment->nextSegmentBranchNotTaken))
- {
- markSegProcessed(currentSegment->nextSegmentBranchNotTaken);
- list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
- }
- if (currentSegment->nextSegmentBranchTaken && !isSegProcessed(currentSegment->nextSegmentBranchTaken))
- {
- markSegProcessed(currentSegment->nextSegmentBranchTaken);
- list_segments.push_back(currentSegment->nextSegmentBranchTaken);
- }
- index++;
- }
-}
-
-void IMLRA_MergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx)
-{
- for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
- {
- IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
- if (!imlSegment->list_prevSegments.empty())
- continue; // not an entry/standalone segment
- PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment);
- }
-}
-
-void IMLRA_ExtendAbstractRangesOutOfLoops(IMLRegisterAllocatorContext& ctx)
-{
- for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
- {
- IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
- auto localLoopDepth = imlSegment->loopDepth;
- if (localLoopDepth <= 0)
- continue; // not inside a loop
- // look for loop exit
- bool hasLoopExit = false;
- if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
- {
- hasLoopExit = true;
- }
- if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
- {
- hasLoopExit = true;
- }
- if (hasLoopExit == false)
- continue;
-
- // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
- auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
- for (auto& it : segMap)
- {
- if (it.second.usageEnd != RA_INTER_RANGE_END)
- continue;
- if (imlSegment->nextSegmentBranchTaken)
- IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first);
- if (imlSegment->nextSegmentBranchNotTaken)
- IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchNotTaken, it.first);
- }
- }
-}
-
-void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
-{
- IMLRA_MergeCloseAbstractRanges(ctx);
- // extra pass to move register loads and stores out of loops
- IMLRA_ExtendAbstractRangesOutOfLoops(ctx);
- // calculate liveness ranges
- for (auto& segIt : ctx.deprGenContext->segmentList2)
- IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt);
-}
-
-void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange)
-{
- bool isRead = false;
- bool isWritten = false;
- bool isOverwritten = false;
- for (auto& location : subrange->list_accessLocations)
- {
- if (location.IsRead())
- {
- isRead = true;
- }
- if (location.IsWrite())
- {
- if (isRead == false)
- isOverwritten = true;
- isWritten = true;
- }
- }
- subrange->_noLoad = isOverwritten;
- subrange->hasStore = isWritten;
-
- if (subrange->interval.ExtendsPreviousSegment())
- subrange->_noLoad = true;
-}
-
-struct subrangeEndingInfo_t
-{
- raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE];
- sint32 subrangeCount;
-
- bool hasUndefinedEndings;
-};
-
-void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info)
-{
- if (depth >= 30)
- {
- info->hasUndefinedEndings = true;
- return;
- }
- if (subrange->lastIterationIndex == iterationIndex)
- return; // already processed
- subrange->lastIterationIndex = iterationIndex;
- if (subrange->hasStoreDelayed)
- return; // no need to traverse this subrange
- IMLSegment* imlSegment = subrange->imlSegment;
- if (!subrange->interval.ExtendsIntoNextSegment())
- {
- // ending segment
- if (info->subrangeCount >= SUBRANGE_LIST_SIZE)
- {
- info->hasUndefinedEndings = true;
- return;
- }
- else
- {
- info->subrangeList[info->subrangeCount] = subrange;
- info->subrangeCount++;
- }
- return;
- }
-
- // traverse next subranges in flow
- if (imlSegment->nextSegmentBranchNotTaken)
- {
- if (subrange->subrangeBranchNotTaken == nullptr)
- {
- info->hasUndefinedEndings = true;
- }
- else
- {
- _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info);
- }
- }
- if (imlSegment->nextSegmentBranchTaken)
- {
- if (subrange->subrangeBranchTaken == nullptr)
- {
- info->hasUndefinedEndings = true;
- }
- else
- {
- _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info);
- }
- }
-}
-
-static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange)
-{
- if (!subrange->interval.ExtendsIntoNextSegment())
- return;
- // analyze data flow across segments (if this segment has writes)
- if (subrange->hasStore)
- {
- subrangeEndingInfo_t writeEndingInfo;
- writeEndingInfo.subrangeCount = 0;
- writeEndingInfo.hasUndefinedEndings = false;
- _findSubrangeWriteEndings(subrange, IMLRA_GetNextIterationIndex(), 0, &writeEndingInfo);
- if (writeEndingInfo.hasUndefinedEndings == false)
- {
- // get cost of delaying store into endings
- sint32 delayStoreCost = 0;
- bool alreadyStoredInAllEndings = true;
- for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
- {
- raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
- if (subrangeItr->hasStore)
- continue; // this ending already stores, no extra cost
- alreadyStoredInAllEndings = false;
- sint32 storeCost = IMLRA_GetSegmentReadWriteCost(subrangeItr->imlSegment);
- delayStoreCost = std::max(storeCost, delayStoreCost);
- }
- if (alreadyStoredInAllEndings)
- {
- subrange->hasStore = false;
- subrange->hasStoreDelayed = true;
- }
- else if (delayStoreCost <= IMLRA_GetSegmentReadWriteCost(subrange->imlSegment))
- {
- subrange->hasStore = false;
- subrange->hasStoreDelayed = true;
- for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
- {
- raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
- subrangeItr->hasStore = true;
- }
- }
- }
- }
-}
-
-void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext)
-{
- // this function is called after _AssignRegisters(), which means that all liveness ranges are already final and must not be modified anymore
- // track read/write dependencies per segment
- for (auto& seg : ppcImlGenContext->segmentList2)
- {
- raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
- while (subrange)
- {
- IMLRA_AnalyzeSubrangeDataDependency(subrange);
- subrange = subrange->link_allSegmentRanges.next;
- }
- }
- // propagate information across segment boundaries
- for (auto& seg : ppcImlGenContext->segmentList2)
- {
- raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
- while (subrange)
- {
- IMLRA_AnalyzeRangeDataFlow(subrange);
- subrange = subrange->link_allSegmentRanges.next;
- }
- }
-}
-
-/* Generate move instructions */
-
-inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId)
-{
- return IMLReg(baseFormat, baseFormat, 0, regId);
-}
-
-// prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts
-void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
-{
- std::unordered_map virtId2PhysReg;
- boost::container::small_vector activeRanges;
- raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
- raInstructionEdge currentEdge;
- for (size_t i = 0; i < imlSegment->imlList.size(); i++)
- {
- currentEdge.Set(i, false); // set to instruction index on output edge
- // activate ranges which begin before or during this instruction
- while (currentRange && currentRange->interval.start <= currentEdge)
- {
- cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict
-
- virtId2PhysReg[currentRange->GetVirtualRegister()] = currentRange->GetPhysicalRegister();
- activeRanges.push_back(currentRange);
- currentRange = currentRange->link_allSegmentRanges.next;
- }
- // rewrite registers
- imlSegment->imlList[i].RewriteGPR(virtId2PhysReg);
- // deactivate ranges which end during this instruction
- auto it = activeRanges.begin();
- while (it != activeRanges.end())
- {
- if ((*it)->interval.end <= currentEdge)
- {
- virtId2PhysReg.erase((*it)->GetVirtualRegister());
- it = activeRanges.erase(it);
- }
- else
- ++it;
- }
- }
-}
-
-void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
-{
- IMLRA_RewriteRegisters(ctx, imlSegment);
-
-#if DEBUG_RA_INSTRUCTION_GEN
- cemuLog_log(LogType::Force, "");
- cemuLog_log(LogType::Force, "[Seg before RA]");
- IMLDebug_DumpSegment(nullptr, imlSegment, true);
-#endif
-
- bool hadSuffixInstruction = imlSegment->HasSuffixInstruction();
-
- std::vector rebuiltInstructions;
- sint32 numInstructionsWithoutSuffix = (sint32)imlSegment->imlList.size() - (imlSegment->HasSuffixInstruction() ? 1 : 0);
-
- if (imlSegment->imlList.empty())
- {
- // empty segments need special handling (todo - look into merging this with the core logic below eventually)
- // store all ranges
- raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
- while (currentRange)
- {
- if (currentRange->hasStore)
- rebuiltInstructions.emplace_back().make_name_r(currentRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()));
- currentRange = currentRange->link_allSegmentRanges.next;
- }
- // load ranges
- currentRange = imlSegment->raInfo.linkedList_allSubranges;
- while (currentRange)
- {
- if (!currentRange->_noLoad)
- {
- cemu_assert_debug(currentRange->interval.ExtendsIntoNextSegment());
- rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
- }
- currentRange = currentRange->link_allSegmentRanges.next;
- }
- imlSegment->imlList = std::move(rebuiltInstructions);
- return;
- }
-
- // make sure that no range exceeds the suffix instruction input edge except if they need to be loaded for the next segment (todo - for those, set the start point accordingly?)
- {
- raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
- raInstructionEdge edge;
- if (imlSegment->HasSuffixInstruction())
- edge.Set(numInstructionsWithoutSuffix, true);
- else
- edge.Set(numInstructionsWithoutSuffix - 1, false);
-
- while (currentRange)
- {
- if (!currentRange->interval.IsNextSegmentOnly() && currentRange->interval.end > edge)
- {
- currentRange->interval.SetEnd(edge);
- }
- currentRange = currentRange->link_allSegmentRanges.next;
- }
- }
-
-#if DEBUG_RA_INSTRUCTION_GEN
- cemuLog_log(LogType::Force, "");
- cemuLog_log(LogType::Force, "--- Intermediate liveness info ---");
- {
- raLivenessRange* dbgRange = imlSegment->raInfo.linkedList_allSubranges;
- while (dbgRange)
- {
- cemuLog_log(LogType::Force, "Range i{}: {}-{}", dbgRange->GetVirtualRegister(), dbgRange->interval2.start.GetDebugString(), dbgRange->interval2.end.GetDebugString());
- dbgRange = dbgRange->link_allSegmentRanges.next;
- }
- }
-#endif
-
- boost::container::small_vector activeRanges;
- // first we add all the ranges that extend from the previous segment, some of these will end immediately at the first instruction so we might need to store them early
- raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
- // make all ranges active that start on RA_INTER_RANGE_START
- while (currentRange && currentRange->interval.start.ConnectsToPreviousSegment())
- {
- activeRanges.push_back(currentRange);
- currentRange = currentRange->link_allSegmentRanges.next;
- }
- // store all ranges that end before the first output edge (includes RA_INTER_RANGE_START)
- auto it = activeRanges.begin();
- raInstructionEdge firstOutputEdge;
- firstOutputEdge.Set(0, false);
- while (it != activeRanges.end())
- {
- if ((*it)->interval.end < firstOutputEdge)
- {
- raLivenessRange* storedRange = *it;
- if (storedRange->hasStore)
- rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
- it = activeRanges.erase(it);
- continue;
- }
- ++it;
- }
-
- sint32 numInstructions = (sint32)imlSegment->imlList.size();
- for (sint32 i = 0; i < numInstructions; i++)
- {
- raInstructionEdge curEdge;
- // input edge
- curEdge.SetRaw(i * 2 + 1); // +1 to include ranges that start at the output of the instruction
- while (currentRange && currentRange->interval.start <= curEdge)
- {
- if (!currentRange->_noLoad)
- {
- rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
- }
- activeRanges.push_back(currentRange);
- currentRange = currentRange->link_allSegmentRanges.next;
- }
- // copy instruction
- rebuiltInstructions.push_back(imlSegment->imlList[i]);
- // output edge
- curEdge.SetRaw(i * 2 + 1 + 1);
- // also store ranges that end on the next input edge, we handle this by adding an extra 1 above
- auto it = activeRanges.begin();
- while (it != activeRanges.end())
- {
- if ((*it)->interval.end <= curEdge)
- {
- // range expires
- // todo - check hasStore
- raLivenessRange* storedRange = *it;
- if (storedRange->hasStore)
- {
- cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix
- rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
- }
- it = activeRanges.erase(it);
- continue;
- }
- ++it;
- }
- }
- // if there is no suffix instruction we currently need to handle the final loads here
- cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
- if (imlSegment->HasSuffixInstruction())
- {
- if (currentRange)
- {
- cemuLog_logDebug(LogType::Force, "[DEBUG] GenerateSegmentMoveInstructions() hit suffix path with non-null currentRange. Segment: {:08x}", imlSegment->ppcAddress);
- }
- for (auto& remainingRange : activeRanges)
- {
- cemu_assert_debug(!remainingRange->hasStore);
- }
- }
- else
- {
- for (auto& remainingRange : activeRanges)
- {
- cemu_assert_debug(!remainingRange->hasStore); // this range still needs to be stored
- }
- while (currentRange)
- {
- cemu_assert_debug(currentRange->interval.IsNextSegmentOnly());
- cemu_assert_debug(!currentRange->_noLoad);
- rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
- currentRange = currentRange->link_allSegmentRanges.next;
- }
- }
-
- imlSegment->imlList = std::move(rebuiltInstructions);
- cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
-
-#if DEBUG_RA_INSTRUCTION_GEN
- cemuLog_log(LogType::Force, "");
- cemuLog_log(LogType::Force, "[Seg after RA]");
- IMLDebug_DumpSegment(nullptr, imlSegment, false);
-#endif
-}
-
-void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx)
-{
- for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
- {
- IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
- IMLRA_GenerateSegmentMoveInstructions2(ctx, imlSegment);
- }
-}
-
-static void DbgVerifyFixedRegRequirements(IMLSegment* imlSegment)
-{
-#if DEBUG_RA_EXTRA_VALIDATION
- std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
- for(auto& fixedReq : frr)
- {
- for (raLivenessRange* range = imlSegment->raInfo.linkedList_allSubranges; range; range = range->link_allSegmentRanges.next)
- {
- if (!range->interval2.ContainsEdge(fixedReq.pos))
- continue;
- // verify if the requirement is compatible
- if(range->GetVirtualRegister() == fixedReq.regId)
- {
- cemu_assert(range->HasPhysicalRegister());
- cemu_assert(fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register matches, but not assigned the right physical register
- }
- else
- {
- cemu_assert(!fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register does not match, but using the reserved physical register
- }
- }
- }
-#endif
-}
-
-static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx)
-{
-#if DEBUG_RA_EXTRA_VALIDATION
- for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
- {
- IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
- raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
- while (subrangeItr)
- {
- PPCRecRA_debugValidateSubrange(subrangeItr);
- subrangeItr = subrangeItr->link_allSegmentRanges.next;
- }
- }
- // check that no range validates register requirements
- for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
- {
- DbgVerifyFixedRegRequirements(ctx.deprGenContext->segmentList2[s]);
- }
-#endif
-}
-
-void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam)
-{
- IMLRegisterAllocatorContext ctx;
- ctx.raParam = &raParam;
- ctx.deprGenContext = ppcImlGenContext;
-
- IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext);
- ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment
- ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size());
- IMLRA_CalculateLivenessRanges(ctx);
- IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx);
- IMLRA_AssignRegisters(ctx, ppcImlGenContext);
- DbgVerifyAllRanges(ctx);
- IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext);
- IMLRA_GenerateMoveInstructions(ctx);
-
- IMLRA_DeleteAllRanges(ppcImlGenContext);
-}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
deleted file mode 100644
index 0a54e4cb..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#pragma once
-
-// container for storing a set of register indices
-// specifically optimized towards storing typical range of physical register indices (expected to be below 64)
-class IMLPhysRegisterSet
-{
-public:
- void SetAvailable(uint32 index)
- {
- cemu_assert_debug(index < 64);
- m_regBitmask |= ((uint64)1 << index);
- }
-
- void SetReserved(uint32 index)
- {
- cemu_assert_debug(index < 64);
- m_regBitmask &= ~((uint64)1 << index);
- }
-
- void SetAllAvailable()
- {
- m_regBitmask = ~0ull;
- }
-
- bool HasAllAvailable() const
- {
- return m_regBitmask == ~0ull;
- }
-
- bool IsAvailable(uint32 index) const
- {
- return (m_regBitmask & ((uint64)1 << index)) != 0;
- }
-
- IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other)
- {
- this->m_regBitmask &= other.m_regBitmask;
- return *this;
- }
-
- IMLPhysRegisterSet& operator=(const IMLPhysRegisterSet& other)
- {
- this->m_regBitmask = other.m_regBitmask;
- return *this;
- }
-
- void RemoveRegisters(const IMLPhysRegisterSet& other)
- {
- this->m_regBitmask &= ~other.m_regBitmask;
- }
-
- bool HasAnyAvailable() const
- {
- return m_regBitmask != 0;
- }
-
- bool HasExactlyOneAvailable() const
- {
- return m_regBitmask != 0 && (m_regBitmask & (m_regBitmask - 1)) == 0;
- }
-
- // returns index of first available register. Do not call when HasAnyAvailable() == false
- IMLPhysReg GetFirstAvailableReg()
- {
- cemu_assert_debug(m_regBitmask != 0);
- sint32 regIndex = 0;
- auto tmp = m_regBitmask;
- while ((tmp & 0xFF) == 0)
- {
- regIndex += 8;
- tmp >>= 8;
- }
- while ((tmp & 0x1) == 0)
- {
- regIndex++;
- tmp >>= 1;
- }
- return regIndex;
- }
-
- // returns index of next available register (search includes any register index >= startIndex)
- // returns -1 if there is no more register
- IMLPhysReg GetNextAvailableReg(sint32 startIndex) const
- {
- if (startIndex >= 64)
- return -1;
- uint32 regIndex = startIndex;
- auto tmp = m_regBitmask;
- tmp >>= regIndex;
- if (!tmp)
- return -1;
- while ((tmp & 0xFF) == 0)
- {
- regIndex += 8;
- tmp >>= 8;
- }
- while ((tmp & 0x1) == 0)
- {
- regIndex++;
- tmp >>= 1;
- }
- return regIndex;
- }
-
- sint32 CountAvailableRegs() const
- {
- return std::popcount(m_regBitmask);
- }
-
-private:
- uint64 m_regBitmask{ 0 };
-};
-
-struct IMLRegisterAllocatorParameters
-{
- inline IMLPhysRegisterSet& GetPhysRegPool(IMLRegFormat regFormat)
- {
- return perTypePhysPool[stdx::to_underlying(regFormat)];
- }
-
- IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];
- std::unordered_map regIdToName;
-};
-
-void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam);
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
deleted file mode 100644
index 583d5905..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
+++ /dev/null
@@ -1,635 +0,0 @@
-#include "../PPCRecompiler.h"
-#include "../PPCRecompilerIml.h"
-#include "IMLRegisterAllocatorRanges.h"
-#include "util/helpers/MemoryPool.h"
-
-uint32 IMLRA_GetNextIterationIndex();
-
-IMLRegID raLivenessRange::GetVirtualRegister() const
-{
- return virtualRegister;
-}
-
-sint32 raLivenessRange::GetPhysicalRegister() const
-{
- return physicalRegister;
-}
-
-IMLName raLivenessRange::GetName() const
-{
- return name;
-}
-
-void raLivenessRange::SetPhysicalRegister(IMLPhysReg physicalRegister)
-{
- this->physicalRegister = physicalRegister;
-}
-
-void raLivenessRange::SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister)
-{
- auto clusterRanges = GetAllSubrangesInCluster();
- for(auto& range : clusterRanges)
- range->physicalRegister = physicalRegister;
-}
-
-boost::container::small_vector raLivenessRange::GetAllSubrangesInCluster()
-{
- uint32 iterationIndex = IMLRA_GetNextIterationIndex();
- boost::container::small_vector subranges;
- subranges.push_back(this);
- this->lastIterationIndex = iterationIndex;
- size_t i = 0;
- while(isubrangeBranchTaken && cur->subrangeBranchTaken->lastIterationIndex != iterationIndex)
- {
- cur->subrangeBranchTaken->lastIterationIndex = iterationIndex;
- subranges.push_back(cur->subrangeBranchTaken);
- }
- if(cur->subrangeBranchNotTaken && cur->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
- {
- cur->subrangeBranchNotTaken->lastIterationIndex = iterationIndex;
- subranges.push_back(cur->subrangeBranchNotTaken);
- }
- // check predecessors
- for(auto& prev : cur->previousRanges)
- {
- if(prev->lastIterationIndex != iterationIndex)
- {
- prev->lastIterationIndex = iterationIndex;
- subranges.push_back(prev);
- }
- }
- }
- return subranges;
-}
-
-void raLivenessRange::GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs)
-{
- range->lastIterationIndex = iterationIndex;
- for (auto& it : range->list_fixedRegRequirements)
- allowedRegs &= it.allowedReg;
- // check successors
- if (range->subrangeBranchTaken && range->subrangeBranchTaken->lastIterationIndex != iterationIndex)
- GetAllowedRegistersExRecursive(range->subrangeBranchTaken, iterationIndex, allowedRegs);
- if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
- GetAllowedRegistersExRecursive(range->subrangeBranchNotTaken, iterationIndex, allowedRegs);
- // check predecessors
- for (auto& prev : range->previousRanges)
- {
- if (prev->lastIterationIndex != iterationIndex)
- GetAllowedRegistersExRecursive(prev, iterationIndex, allowedRegs);
- }
-};
-
-bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters)
-{
- uint32 iterationIndex = IMLRA_GetNextIterationIndex();
- allowedRegisters.SetAllAvailable();
- GetAllowedRegistersExRecursive(this, iterationIndex, allowedRegisters);
- return !allowedRegisters.HasAllAvailable();
-}
-
-IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool)
-{
- IMLPhysRegisterSet fixedRegRequirements = regPool;
- if(interval.ExtendsPreviousSegment() || interval.ExtendsIntoNextSegment())
- {
- auto clusterRanges = GetAllSubrangesInCluster();
- for(auto& subrange : clusterRanges)
- {
- for(auto& fixedRegLoc : subrange->list_fixedRegRequirements)
- fixedRegRequirements &= fixedRegLoc.allowedReg;
- }
- return fixedRegRequirements;
- }
- for(auto& fixedRegLoc : list_fixedRegRequirements)
- fixedRegRequirements &= fixedRegLoc.allowedReg;
- return fixedRegRequirements;
-}
-
-void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange)
-{
- IMLRegID regId = subrange->GetVirtualRegister();
- auto it = root.find(regId);
- if (it == root.end())
- {
- // new single element
- root.try_emplace(regId, subrange);
- subrange->link_sameVirtualRegister.prev = nullptr;
- subrange->link_sameVirtualRegister.next = nullptr;
- }
- else
- {
- // insert in first position
- raLivenessRange* priorFirst = it->second;
- subrange->link_sameVirtualRegister.next = priorFirst;
- it->second = subrange;
- subrange->link_sameVirtualRegister.prev = nullptr;
- priorFirst->link_sameVirtualRegister.prev = subrange;
- }
-}
-
-void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange)
-{
- subrange->link_allSegmentRanges.next = *root;
- if (*root)
- (*root)->link_allSegmentRanges.prev = subrange;
- subrange->link_allSegmentRanges.prev = nullptr;
- *root = subrange;
-}
-
-void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange)
-{
-#ifdef CEMU_DEBUG_ASSERT
- raLivenessRange* cur = root.find(subrange->GetVirtualRegister())->second;
- bool hasRangeFound = false;
- while(cur)
- {
- if(cur == subrange)
- {
- hasRangeFound = true;
- break;
- }
- cur = cur->link_sameVirtualRegister.next;
- }
- cemu_assert_debug(hasRangeFound);
-#endif
- IMLRegID regId = subrange->GetVirtualRegister();
- raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next;
- raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev;
- raLivenessRange* newBase = prevRange ? prevRange : nextRange;
- if (prevRange)
- prevRange->link_sameVirtualRegister.next = subrange->link_sameVirtualRegister.next;
- if (nextRange)
- nextRange->link_sameVirtualRegister.prev = subrange->link_sameVirtualRegister.prev;
-
- if (!prevRange)
- {
- if (nextRange)
- {
- root.find(regId)->second = nextRange;
- }
- else
- {
- cemu_assert_debug(root.find(regId)->second == subrange);
- root.erase(regId);
- }
- }
-#ifdef CEMU_DEBUG_ASSERT
- subrange->link_sameVirtualRegister.prev = (raLivenessRange*)1;
- subrange->link_sameVirtualRegister.next = (raLivenessRange*)1;
-#endif
-}
-
-void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange)
-{
- raLivenessRange* tempPrev = subrange->link_allSegmentRanges.prev;
- if (subrange->link_allSegmentRanges.prev)
- subrange->link_allSegmentRanges.prev->link_allSegmentRanges.next = subrange->link_allSegmentRanges.next;
- else
- (*root) = subrange->link_allSegmentRanges.next;
- if (subrange->link_allSegmentRanges.next)
- subrange->link_allSegmentRanges.next->link_allSegmentRanges.prev = tempPrev;
-#ifdef CEMU_DEBUG_ASSERT
- subrange->link_allSegmentRanges.prev = (raLivenessRange*)1;
- subrange->link_allSegmentRanges.next = (raLivenessRange*)1;
-#endif
-}
-
-MemoryPoolPermanentObjects memPool_livenessSubrange(4096);
-
-// startPosition and endPosition are inclusive
-raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition)
-{
- raLivenessRange* range = memPool_livenessSubrange.acquireObj();
- range->previousRanges.clear();
- range->list_accessLocations.clear();
- range->list_fixedRegRequirements.clear();
- range->imlSegment = imlSegment;
-
- cemu_assert_debug(startPosition <= endPosition);
- range->interval.start = startPosition;
- range->interval.end = endPosition;
-
- // register mapping
- range->virtualRegister = virtualRegister;
- range->name = name;
- range->physicalRegister = -1;
- // default values
- range->hasStore = false;
- range->hasStoreDelayed = false;
- range->lastIterationIndex = 0;
- range->subrangeBranchNotTaken = nullptr;
- range->subrangeBranchTaken = nullptr;
- cemu_assert_debug(range->previousRanges.empty());
- range->_noLoad = false;
- // add to segment linked lists
- PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range);
- PPCRecRARange_addLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range);
- return range;
-}
-
-void _unlinkSubrange(raLivenessRange* range)
-{
- IMLSegment* imlSegment = range->imlSegment;
- PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range);
- PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range);
- // unlink reverse references
- if(range->subrangeBranchTaken)
- range->subrangeBranchTaken->previousRanges.erase(std::find(range->subrangeBranchTaken->previousRanges.begin(), range->subrangeBranchTaken->previousRanges.end(), range));
- if(range->subrangeBranchNotTaken)
- range->subrangeBranchNotTaken->previousRanges.erase(std::find(range->subrangeBranchNotTaken->previousRanges.begin(), range->subrangeBranchNotTaken->previousRanges.end(), range));
- range->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1;
- range->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1;
- // remove forward references
- for(auto& prev : range->previousRanges)
- {
- if(prev->subrangeBranchTaken == range)
- prev->subrangeBranchTaken = nullptr;
- if(prev->subrangeBranchNotTaken == range)
- prev->subrangeBranchNotTaken = nullptr;
- }
- range->previousRanges.clear();
-}
-
-void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range)
-{
- _unlinkSubrange(range);
- range->list_accessLocations.clear();
- range->list_fixedRegRequirements.clear();
- memPool_livenessSubrange.releaseObj(range);
-}
-
-void IMLRA_DeleteRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range)
-{
- auto clusterRanges = range->GetAllSubrangesInCluster();
- for (auto& subrange : clusterRanges)
- IMLRA_DeleteRange(ppcImlGenContext, subrange);
-}
-
-void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
-{
- for(auto& seg : ppcImlGenContext->segmentList2)
- {
- raLivenessRange* cur;
- while(cur = seg->raInfo.linkedList_allSubranges)
- IMLRA_DeleteRange(ppcImlGenContext, cur);
- seg->raInfo.linkedList_allSubranges = nullptr;
- seg->raInfo.linkedList_perVirtualRegister.clear();
- }
-}
-
-void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange)
-{
-#ifdef CEMU_DEBUG_ASSERT
- PPCRecRA_debugValidateSubrange(subrange);
- PPCRecRA_debugValidateSubrange(absorbedSubrange);
- if (subrange->imlSegment != absorbedSubrange->imlSegment)
- assert_dbg();
- cemu_assert_debug(subrange->interval.end == absorbedSubrange->interval.start);
-
- if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
- assert_dbg();
- if (subrange == absorbedSubrange)
- assert_dbg();
-#endif
- // update references
- subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
- subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
- absorbedSubrange->subrangeBranchTaken = nullptr;
- absorbedSubrange->subrangeBranchNotTaken = nullptr;
- if(subrange->subrangeBranchTaken)
- *std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange;
- if(subrange->subrangeBranchNotTaken)
- *std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange;
-
- // merge usage locations
- for (auto& accessLoc : absorbedSubrange->list_accessLocations)
- subrange->list_accessLocations.push_back(accessLoc);
- absorbedSubrange->list_accessLocations.clear();
- // merge fixed reg locations
-#ifdef CEMU_DEBUG_ASSERT
- if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty())
- {
- cemu_assert_debug(subrange->list_fixedRegRequirements.back().pos < absorbedSubrange->list_fixedRegRequirements.front().pos);
- }
-#endif
- for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements)
- subrange->list_fixedRegRequirements.push_back(fixedReg);
- absorbedSubrange->list_fixedRegRequirements.clear();
-
- subrange->interval.end = absorbedSubrange->interval.end;
-
- PPCRecRA_debugValidateSubrange(subrange);
-
- IMLRA_DeleteRange(ppcImlGenContext, absorbedSubrange);
-}
-
-// remove all inter-segment connections from the range cluster and split it into local ranges. Ranges are trimmed and if they have no access location they will be removed
-void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange)
-{
- cemu_assert_debug(originRange->interval.ExtendsPreviousSegment() || originRange->interval.ExtendsIntoNextSegment()); // only call this on ranges that span multiple segments
- auto clusterRanges = originRange->GetAllSubrangesInCluster();
- for (auto& subrange : clusterRanges)
- {
- if (subrange->list_accessLocations.empty())
- continue;
- raInterval interval;
- interval.SetInterval(subrange->list_accessLocations.front().pos, subrange->list_accessLocations.back().pos);
- raLivenessRange* newSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end);
- // copy locations and fixed reg indices
- newSubrange->list_accessLocations = subrange->list_accessLocations;
- newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements;
- if(originRange->HasPhysicalRegister())
- {
- cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement
- }
- // validate
- if(!newSubrange->list_accessLocations.empty())
- {
- cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval.start);
- cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval.end);
- }
- if(!newSubrange->list_fixedRegRequirements.empty())
- {
- cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters()
- cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval.end);
- }
- }
- // delete the original range cluster
- IMLRA_DeleteRangeCluster(ppcImlGenContext, originRange);
-}
-
-#ifdef CEMU_DEBUG_ASSERT
-void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
-{
- // validate subrange
- if (range->subrangeBranchTaken && range->subrangeBranchTaken->imlSegment != range->imlSegment->nextSegmentBranchTaken)
- assert_dbg();
- if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->imlSegment != range->imlSegment->nextSegmentBranchNotTaken)
- assert_dbg();
-
- if(range->subrangeBranchTaken || range->subrangeBranchNotTaken)
- {
- cemu_assert_debug(range->interval.end.ConnectsToNextSegment());
- }
- if(!range->previousRanges.empty())
- {
- cemu_assert_debug(range->interval.start.ConnectsToPreviousSegment());
- }
- // validate locations
- if (!range->list_accessLocations.empty())
- {
- cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval.start);
- cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval.end);
- }
- // validate fixed reg requirements
- if (!range->list_fixedRegRequirements.empty())
- {
- cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval.start);
- cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval.end);
- for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++)
- cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos);
- }
-
-}
-#else
-void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {}
-#endif
-
-// trim start and end of range to match first and last read/write locations
-// does not trim start/endpoints which extend into the next/previous segment
-void IMLRA_TrimRangeToUse(raLivenessRange* range)
-{
- if(range->list_accessLocations.empty())
- {
- // special case where we trim ranges extending from other segments to a single instruction edge
- cemu_assert_debug(!range->interval.start.IsInstructionIndex() || !range->interval.end.IsInstructionIndex());
- if(range->interval.start.IsInstructionIndex())
- range->interval.start = range->interval.end;
- if(range->interval.end.IsInstructionIndex())
- range->interval.end = range->interval.start;
- return;
- }
- // trim start and end
- raInterval prevInterval = range->interval;
- if(range->interval.start.IsInstructionIndex())
- range->interval.start = range->list_accessLocations.front().pos;
- if(range->interval.end.IsInstructionIndex())
- range->interval.end = range->list_accessLocations.back().pos;
- // extra checks
-#ifdef CEMU_DEBUG_ASSERT
- cemu_assert_debug(range->interval.start <= range->interval.end);
- for(auto& loc : range->list_accessLocations)
- {
- cemu_assert_debug(range->interval.ContainsEdge(loc.pos));
- }
- cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval));
-#endif
-}
-
-// split range at the given position
-// After the split there will be two ranges:
-// head -> subrange is shortened to end at splitIndex (exclusive)
-// tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange
-// if head has a physical register assigned it will not carry over to tail
-// The return value is the tail range
-// If trimToUsage is true, the end of the head subrange and the start of the tail subrange will be shrunk to fit the read/write locations within. If there are no locations then the range will be deleted
-raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage)
-{
- cemu_assert_debug(splitPosition.IsInstructionIndex());
- cemu_assert_debug(!subrange->interval.IsNextSegmentOnly() && !subrange->interval.IsPreviousSegmentOnly());
- cemu_assert_debug(subrange->interval.ContainsEdge(splitPosition));
- // determine new intervals
- raInterval headInterval, tailInterval;
- headInterval.SetInterval(subrange->interval.start, splitPosition-1);
- tailInterval.SetInterval(splitPosition, subrange->interval.end);
- cemu_assert_debug(headInterval.start <= headInterval.end);
- cemu_assert_debug(tailInterval.start <= tailInterval.end);
- // create tail
- raLivenessRange* tailSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), tailInterval.start, tailInterval.end);
- tailSubrange->SetPhysicalRegister(subrange->GetPhysicalRegister());
- // carry over branch targets and update reverse references
- tailSubrange->subrangeBranchTaken = subrange->subrangeBranchTaken;
- tailSubrange->subrangeBranchNotTaken = subrange->subrangeBranchNotTaken;
- subrange->subrangeBranchTaken = nullptr;
- subrange->subrangeBranchNotTaken = nullptr;
- if(tailSubrange->subrangeBranchTaken)
- *std::find(tailSubrange->subrangeBranchTaken->previousRanges.begin(), tailSubrange->subrangeBranchTaken->previousRanges.end(), subrange) = tailSubrange;
- if(tailSubrange->subrangeBranchNotTaken)
- *std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange;
- // we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case
-#ifdef CEMU_DEBUG_ASSERT
- if(subrange->list_accessLocations.size() > 1)
- {
- for(size_t i=0; ilist_accessLocations.size()-1; i++)
- {
- cemu_assert_debug(subrange->list_accessLocations[i].pos < subrange->list_accessLocations[i+1].pos);
- }
- }
-#endif
- // split locations
- auto it = std::lower_bound(
- subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), splitPosition,
- [](const raAccessLocation& accessLoc, raInstructionEdge value) { return accessLoc.pos < value; }
- );
- size_t originalCount = subrange->list_accessLocations.size();
- tailSubrange->list_accessLocations.insert(tailSubrange->list_accessLocations.end(), it, subrange->list_accessLocations.end());
- subrange->list_accessLocations.erase(it, subrange->list_accessLocations.end());
- cemu_assert_debug(subrange->list_accessLocations.empty() || subrange->list_accessLocations.back().pos < splitPosition);
- cemu_assert_debug(tailSubrange->list_accessLocations.empty() || tailSubrange->list_accessLocations.front().pos >= splitPosition);
- cemu_assert_debug(subrange->list_accessLocations.size() + tailSubrange->list_accessLocations.size() == originalCount);
- // split fixed reg requirements
- for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
- {
- raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
- if (tailInterval.ContainsEdge(fixedReg->pos))
- {
- tailSubrange->list_fixedRegRequirements.push_back(*fixedReg);
- }
- }
- // remove tail fixed reg requirements from head
- for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
- {
- raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
- if (!headInterval.ContainsEdge(fixedReg->pos))
- {
- subrange->list_fixedRegRequirements.resize(i);
- break;
- }
- }
- // adjust intervals
- subrange->interval = headInterval;
- tailSubrange->interval = tailInterval;
- // trim to hole
- if(trimToUsage)
- {
- if(subrange->list_accessLocations.empty() && (subrange->interval.start.IsInstructionIndex() && subrange->interval.end.IsInstructionIndex()))
- {
- IMLRA_DeleteRange(ppcImlGenContext, subrange);
- subrange = nullptr;
- }
- else
- {
- IMLRA_TrimRangeToUse(subrange);
- }
- if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval.start.IsInstructionIndex() && tailSubrange->interval.end.IsInstructionIndex()))
- {
- IMLRA_DeleteRange(ppcImlGenContext, tailSubrange);
- tailSubrange = nullptr;
- }
- else
- {
- IMLRA_TrimRangeToUse(tailSubrange);
- }
- }
- // validation
- cemu_assert_debug(!subrange || subrange->interval.start <= subrange->interval.end);
- cemu_assert_debug(!tailSubrange || tailSubrange->interval.start <= tailSubrange->interval.end);
- cemu_assert_debug(!tailSubrange || tailSubrange->interval.start >= splitPosition);
- if (!trimToUsage)
- cemu_assert_debug(!tailSubrange || tailSubrange->interval.start == splitPosition);
-
- if(subrange)
- PPCRecRA_debugValidateSubrange(subrange);
- if(tailSubrange)
- PPCRecRA_debugValidateSubrange(tailSubrange);
- return tailSubrange;
-}
-
-sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment)
-{
- sint32 v = imlSegment->loopDepth + 1;
- v *= 5;
- return v*v; // 25, 100, 225, 400
-}
-
-// calculate additional cost of range that it would have after calling _ExplodeRange() on it
-sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange)
-{
- auto ranges = subrange->GetAllSubrangesInCluster();
- sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges);
- for (auto& subrange : ranges)
- {
- if (subrange->list_accessLocations.empty())
- continue; // this range would be deleted and thus has no cost
- sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
- bool hasAdditionalLoad = subrange->interval.ExtendsPreviousSegment();
- bool hasAdditionalStore = subrange->interval.ExtendsIntoNextSegment();
- if(hasAdditionalLoad && subrange->list_accessLocations.front().IsWrite()) // if written before read then a load isn't necessary
- {
- cemu_assert_debug(!subrange->list_accessLocations.front().IsRead());
- cost += segmentLoadStoreCost;
- }
- if(hasAdditionalStore)
- {
- bool hasWrite = std::find_if(subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != subrange->list_accessLocations.end();
- if(!hasWrite) // ranges which don't modify their value do not need to be stored
- cost += segmentLoadStoreCost;
- }
- }
- // todo - properly calculating all the data-flow dependency based costs is more complex so this currently is an approximation
- return cost;
-}
-
-sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition)
-{
- // validation
-#ifdef CEMU_DEBUG_ASSERT
- if (subrange->interval.ExtendsIntoNextSegment())
- assert_dbg();
-#endif
- cemu_assert_debug(splitPosition.IsInstructionIndex());
-
- sint32 cost = 0;
- // find split position in location list
- if (subrange->list_accessLocations.empty())
- return 0;
- if (splitPosition <= subrange->list_accessLocations.front().pos)
- return 0;
- if (splitPosition > subrange->list_accessLocations.back().pos)
- return 0;
-
- size_t firstTailLocationIndex = 0;
- for (size_t i = 0; i < subrange->list_accessLocations.size(); i++)
- {
- if (subrange->list_accessLocations[i].pos >= splitPosition)
- {
- firstTailLocationIndex = i;
- break;
- }
- }
- std::span headLocations{subrange->list_accessLocations.data(), firstTailLocationIndex};
- std::span tailLocations{subrange->list_accessLocations.data() + firstTailLocationIndex, subrange->list_accessLocations.size() - firstTailLocationIndex};
- cemu_assert_debug(headLocations.empty() || headLocations.back().pos < splitPosition);
- cemu_assert_debug(tailLocations.empty() || tailLocations.front().pos >= splitPosition);
-
- sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
-
- auto CalculateCostFromLocationRange = [segmentLoadStoreCost](std::span locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32
- {
- if(locations.empty())
- return 0;
- sint32 cost = 0;
- if(locations.front().IsRead() && trackLoadCost)
- cost += segmentLoadStoreCost; // not overwritten, so there is a load cost
- bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != locations.end();
- if(hasWrite && trackStoreCost)
- cost += segmentLoadStoreCost; // modified, so there is a store cost
- return cost;
- };
-
- sint32 baseCost = CalculateCostFromLocationRange(subrange->list_accessLocations);
-
- bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().IsRead() && tailLocations.front().IsWrite();
-
- sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true);
- cemu_assert_debug(newCost >= baseCost);
- cost = newCost - baseCost;
-
- return cost;
-}
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
deleted file mode 100644
index b0685cc5..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
+++ /dev/null
@@ -1,364 +0,0 @@
-#pragma once
-#include "IMLRegisterAllocator.h"
-
-struct raLivenessSubrangeLink
-{
- struct raLivenessRange* prev;
- struct raLivenessRange* next;
-};
-
-struct raInstructionEdge
-{
- friend struct raInterval;
-public:
- raInstructionEdge()
- {
- index = 0;
- }
-
- raInstructionEdge(sint32 instructionIndex, bool isInputEdge)
- {
- Set(instructionIndex, isInputEdge);
- }
-
- void Set(sint32 instructionIndex, bool isInputEdge)
- {
- if(instructionIndex == RA_INTER_RANGE_START || instructionIndex == RA_INTER_RANGE_END)
- {
- index = instructionIndex;
- return;
- }
- index = instructionIndex * 2 + (isInputEdge ? 0 : 1);
- cemu_assert_debug(index >= 0 && index < 0x100000*2); // make sure index value is sane
- }
-
- void SetRaw(sint32 index)
- {
- this->index = index;
- cemu_assert_debug(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END || (index >= 0 && index < 0x100000*2)); // make sure index value is sane
- }
-
- // sint32 GetRaw()
- // {
- // this->index = index;
- // }
-
- std::string GetDebugString()
- {
- if(index == RA_INTER_RANGE_START)
- return "RA_START";
- else if(index == RA_INTER_RANGE_END)
- return "RA_END";
- std::string str = fmt::format("{}", GetInstructionIndex());
- if(IsOnInputEdge())
- str += "i";
- else if(IsOnOutputEdge())
- str += "o";
- return str;
- }
-
- sint32 GetInstructionIndex() const
- {
- cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
- return index >> 1;
- }
-
- // returns instruction index or RA_INTER_RANGE_START/RA_INTER_RANGE_END
- sint32 GetInstructionIndexEx() const
- {
- if(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END)
- return index;
- return index >> 1;
- }
-
- sint32 GetRaw() const
- {
- return index;
- }
-
- bool IsOnInputEdge() const
- {
- cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
- return (index&1) == 0;
- }
-
- bool IsOnOutputEdge() const
- {
- cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
- return (index&1) != 0;
- }
-
- bool ConnectsToPreviousSegment() const
- {
- return index == RA_INTER_RANGE_START;
- }
-
- bool ConnectsToNextSegment() const
- {
- return index == RA_INTER_RANGE_END;
- }
-
- bool IsInstructionIndex() const
- {
- return index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END;
- }
-
- // comparison operators
- bool operator>(const raInstructionEdge& other) const
- {
- return index > other.index;
- }
- bool operator<(const raInstructionEdge& other) const
- {
- return index < other.index;
- }
- bool operator<=(const raInstructionEdge& other) const
- {
- return index <= other.index;
- }
- bool operator>=(const raInstructionEdge& other) const
- {
- return index >= other.index;
- }
- bool operator==(const raInstructionEdge& other) const
- {
- return index == other.index;
- }
-
- raInstructionEdge operator+(sint32 offset) const
- {
- cemu_assert_debug(IsInstructionIndex());
- cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END);
- raInstructionEdge edge;
- edge.index = index + offset;
- return edge;
- }
-
- raInstructionEdge operator-(sint32 offset) const
- {
- cemu_assert_debug(IsInstructionIndex());
- cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END);
- raInstructionEdge edge;
- edge.index = index - offset;
- return edge;
- }
-
- raInstructionEdge& operator++()
- {
- cemu_assert_debug(IsInstructionIndex());
- index++;
- return *this;
- }
-
-private:
- sint32 index; // can also be RA_INTER_RANGE_START or RA_INTER_RANGE_END, otherwise contains instruction index * 2
-
-};
-
-struct raAccessLocation
-{
- raAccessLocation(raInstructionEdge pos) : pos(pos) {}
-
- bool IsRead() const
- {
- return pos.IsOnInputEdge();
- }
-
- bool IsWrite() const
- {
- return pos.IsOnOutputEdge();
- }
-
- raInstructionEdge pos;
-};
-
-struct raInterval
-{
- raInterval()
- {
-
- }
-
- raInterval(raInstructionEdge start, raInstructionEdge end)
- {
- SetInterval(start, end);
- }
-
- // isStartOnInput = Input+Output edge on first instruction. If false then only output
- // isEndOnOutput = Input+Output edge on last instruction. If false then only input
- void SetInterval(sint32 start, bool isStartOnInput, sint32 end, bool isEndOnOutput)
- {
- this->start.Set(start, isStartOnInput);
- this->end.Set(end, !isEndOnOutput);
- }
-
- void SetInterval(raInstructionEdge start, raInstructionEdge end)
- {
- cemu_assert_debug(start <= end);
- this->start = start;
- this->end = end;
- }
-
- void SetStart(const raInstructionEdge& edge)
- {
- start = edge;
- }
-
- void SetEnd(const raInstructionEdge& edge)
- {
- end = edge;
- }
-
- sint32 GetStartIndex() const
- {
- return start.GetInstructionIndex();
- }
-
- sint32 GetEndIndex() const
- {
- return end.GetInstructionIndex();
- }
-
- bool ExtendsPreviousSegment() const
- {
- return start.ConnectsToPreviousSegment();
- }
-
- bool ExtendsIntoNextSegment() const
- {
- return end.ConnectsToNextSegment();
- }
-
- bool IsNextSegmentOnly() const
- {
- return start.ConnectsToNextSegment() && end.ConnectsToNextSegment();
- }
-
- bool IsPreviousSegmentOnly() const
- {
- return start.ConnectsToPreviousSegment() && end.ConnectsToPreviousSegment();
- }
-
- // returns true if range is contained within a single segment
- bool IsLocal() const
- {
- return start.GetRaw() > RA_INTER_RANGE_START && end.GetRaw() < RA_INTER_RANGE_END;
- }
-
- bool ContainsInstructionIndex(sint32 instructionIndex) const
- {
- cemu_assert_debug(instructionIndex != RA_INTER_RANGE_START && instructionIndex != RA_INTER_RANGE_END);
- return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx();
- }
-
- // similar to ContainsInstructionIndex, but allows RA_INTER_RANGE_START/END as input
- bool ContainsInstructionIndexEx(sint32 instructionIndex) const
- {
- if(instructionIndex == RA_INTER_RANGE_START)
- return start.ConnectsToPreviousSegment();
- if(instructionIndex == RA_INTER_RANGE_END)
- return end.ConnectsToNextSegment();
- return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx();
- }
-
- bool ContainsEdge(const raInstructionEdge& edge) const
- {
- return edge >= start && edge <= end;
- }
-
- bool ContainsWholeInterval(const raInterval& other) const
- {
- return other.start >= start && other.end <= end;
- }
-
- bool IsOverlapping(const raInterval& other) const
- {
- return start <= other.end && end >= other.start;
- }
-
- sint32 GetPreciseDistance()
- {
- cemu_assert_debug(!start.ConnectsToNextSegment()); // how to handle this?
- if(start == end)
- return 1;
- cemu_assert_debug(!end.ConnectsToPreviousSegment() && !end.ConnectsToNextSegment());
- if(start.ConnectsToPreviousSegment())
- return end.GetRaw() + 1;
-
- return end.GetRaw() - start.GetRaw() + 1; // +1 because end is inclusive
- }
-
-//private: not making these directly accessible only forces us to create loads of verbose getters and setters
- raInstructionEdge start;
- raInstructionEdge end;
-};
-
-struct raFixedRegRequirement
-{
- raInstructionEdge pos;
- IMLPhysRegisterSet allowedReg;
-};
-
-struct raLivenessRange
-{
- IMLSegment* imlSegment;
- raInterval interval;
-
- // dirty state tracking
- bool _noLoad;
- bool hasStore;
- bool hasStoreDelayed;
- // next
- raLivenessRange* subrangeBranchTaken;
- raLivenessRange* subrangeBranchNotTaken;
- // reverse counterpart of BranchTaken/BranchNotTaken
- boost::container::small_vector previousRanges;
- // processing
- uint32 lastIterationIndex;
- // instruction read/write locations
- std::vector list_accessLocations;
- // ordered list of all raInstructionEdge indices which require a fixed register
- std::vector list_fixedRegRequirements;
- // linked list (subranges with same GPR virtual register)
- raLivenessSubrangeLink link_sameVirtualRegister;
- // linked list (all subranges for this segment)
- raLivenessSubrangeLink link_allSegmentRanges;
- // register info
- IMLRegID virtualRegister;
- IMLName name;
- // register allocator result
- IMLPhysReg physicalRegister;
-
- boost::container::small_vector GetAllSubrangesInCluster();
- bool GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters); // if the cluster has fixed register requirements in any instruction this returns the combined register mask. Otherwise returns false in which case allowedRegisters is left undefined
- IMLPhysRegisterSet GetAllowedRegisters(IMLPhysRegisterSet regPool); // return regPool with fixed register requirements filtered out
-
- IMLRegID GetVirtualRegister() const;
- sint32 GetPhysicalRegister() const;
- bool HasPhysicalRegister() const { return physicalRegister >= 0; }
- IMLName GetName() const;
- void SetPhysicalRegister(IMLPhysReg physicalRegister);
- void SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister);
- void UnsetPhysicalRegister() { physicalRegister = -1; }
-
- private:
- void GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs);
-};
-
-raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition);
-void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange);
-void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
-
-void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange);
-
-void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange);
-
-raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage = false);
-
-void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
-
-// cost estimation
-sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment);
-sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange);
-//sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex);
-sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition);
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp
deleted file mode 100644
index f3b6834f..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-#include "IMLInstruction.h"
-#include "IMLSegment.h"
-
-void IMLSegment::SetEnterable(uint32 enterAddress)
-{
- cemu_assert_debug(!isEnterable || enterPPCAddress == enterAddress);
- isEnterable = true;
- enterPPCAddress = enterAddress;
-}
-
-bool IMLSegment::HasSuffixInstruction() const
-{
- if (imlList.empty())
- return false;
- const IMLInstruction& imlInstruction = imlList.back();
- return imlInstruction.IsSuffixInstruction();
-}
-
-sint32 IMLSegment::GetSuffixInstructionIndex() const
-{
- cemu_assert_debug(HasSuffixInstruction());
- return (sint32)(imlList.size() - 1);
-}
-
-IMLInstruction* IMLSegment::GetLastInstruction()
-{
- if (imlList.empty())
- return nullptr;
- return &imlList.back();
-}
-
-void IMLSegment::SetLinkBranchNotTaken(IMLSegment* imlSegmentDst)
-{
- if (nextSegmentBranchNotTaken)
- nextSegmentBranchNotTaken->list_prevSegments.erase(std::find(nextSegmentBranchNotTaken->list_prevSegments.begin(), nextSegmentBranchNotTaken->list_prevSegments.end(), this));
- nextSegmentBranchNotTaken = imlSegmentDst;
- if(imlSegmentDst)
- imlSegmentDst->list_prevSegments.push_back(this);
-}
-
-void IMLSegment::SetLinkBranchTaken(IMLSegment* imlSegmentDst)
-{
- if (nextSegmentBranchTaken)
- nextSegmentBranchTaken->list_prevSegments.erase(std::find(nextSegmentBranchTaken->list_prevSegments.begin(), nextSegmentBranchTaken->list_prevSegments.end(), this));
- nextSegmentBranchTaken = imlSegmentDst;
- if (imlSegmentDst)
- imlSegmentDst->list_prevSegments.push_back(this);
-}
-
-IMLInstruction* IMLSegment::AppendInstruction()
-{
- IMLInstruction& inst = imlList.emplace_back();
- memset(&inst, 0, sizeof(IMLInstruction));
- return &inst;
-}
-
-void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
-{
- // make sure segments aren't already linked
- if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
- return;
- // add as next segment for source
- if (imlSegmentSrc->nextSegmentBranchNotTaken != nullptr)
- assert_dbg();
- imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst;
- // add as previous segment for destination
- imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
-}
-
-void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
-{
- // make sure segments aren't already linked
- if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
- return;
- // add as next segment for source
- if (imlSegmentSrc->nextSegmentBranchTaken != nullptr)
- assert_dbg();
- imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst;
- // add as previous segment for destination
- imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
-}
-
-void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
-{
- if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
- {
- imlSegmentSrc->nextSegmentBranchNotTaken = nullptr;
- }
- else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
- {
- imlSegmentSrc->nextSegmentBranchTaken = nullptr;
- }
- else
- assert_dbg();
-
- bool matchFound = false;
- for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++)
- {
- if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc)
- {
- imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin() + i);
- matchFound = true;
- break;
- }
- }
- if (matchFound == false)
- assert_dbg();
-}
-
-/*
- * Replaces all links to segment orig with linkts to segment new
- */
-void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew)
-{
- while (imlSegmentOrig->list_prevSegments.size() != 0)
- {
- IMLSegment* prevSegment = imlSegmentOrig->list_prevSegments[0];
- if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig)
- {
- IMLSegment_RemoveLink(prevSegment, imlSegmentOrig);
- IMLSegment_SetLinkBranchNotTaken(prevSegment, imlSegmentNew);
- }
- else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig)
- {
- IMLSegment_RemoveLink(prevSegment, imlSegmentOrig);
- IMLSegment_SetLinkBranchTaken(prevSegment, imlSegmentNew);
- }
- else
- {
- assert_dbg();
- }
- }
-}
diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h
deleted file mode 100644
index 10e3dc06..00000000
--- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h
+++ /dev/null
@@ -1,193 +0,0 @@
-#pragma once
-#include "IMLInstruction.h"
-
-#include
-
-// special values to mark the index of ranges that reach across the segment border
-#define RA_INTER_RANGE_START (-1)
-#define RA_INTER_RANGE_END (0x70000000)
-
-struct IMLSegmentPoint
-{
- friend struct IMLSegmentInterval;
-
- sint32 index;
- struct IMLSegment* imlSegment; // do we really need to track this? SegmentPoints are always accessed via the segment that they are part of
- IMLSegmentPoint* next;
- IMLSegmentPoint* prev;
-
- // the index is the instruction index times two.
- // this gives us the ability to cover half an instruction with RA ranges
- // covering only the first half of an instruction (0-0) means that the register is read, but not preserved
- // covering first and the second half means the register is read and preserved
- // covering only the second half means the register is written but not read
-
- sint32 GetInstructionIndex() const
- {
- return index;
- }
-
- void SetInstructionIndex(sint32 index)
- {
- this->index = index;
- }
-
- void ShiftIfAfter(sint32 instructionIndex, sint32 shiftCount)
- {
- if (!IsPreviousSegment() && !IsNextSegment())
- {
- if (GetInstructionIndex() >= instructionIndex)
- index += shiftCount;
- }
- }
-
- void DecrementByOneInstruction()
- {
- index--;
- }
-
- // the segment point can point beyond the first and last instruction which indicates that it is an infinite range reaching up to the previous or next segment
- bool IsPreviousSegment() const { return index == RA_INTER_RANGE_START; }
- bool IsNextSegment() const { return index == RA_INTER_RANGE_END; }
-
- // overload operand > and <
- bool operator>(const IMLSegmentPoint& other) const { return index > other.index; }
- bool operator<(const IMLSegmentPoint& other) const { return index < other.index; }
- bool operator==(const IMLSegmentPoint& other) const { return index == other.index; }
- bool operator!=(const IMLSegmentPoint& other) const { return index != other.index; }
-
- // overload comparison operands for sint32
- bool operator>(const sint32 other) const { return index > other; }
- bool operator<(const sint32 other) const { return index < other; }
- bool operator<=(const sint32 other) const { return index <= other; }
- bool operator>=(const sint32 other) const { return index >= other; }
-};
-
-struct IMLSegmentInterval
-{
- IMLSegmentPoint start;
- IMLSegmentPoint end;
-
- bool ContainsInstructionIndex(sint32 offset) const { return start <= offset && end > offset; }
-
- bool IsRangeOverlapping(const IMLSegmentInterval& other)
- {
- // todo - compare the raw index
- sint32 r1start = this->start.GetInstructionIndex();
- sint32 r1end = this->end.GetInstructionIndex();
- sint32 r2start = other.start.GetInstructionIndex();
- sint32 r2end = other.end.GetInstructionIndex();
- if (r1start < r2end && r1end > r2start)
- return true;
- if (this->start.IsPreviousSegment() && r1start == r2start)
- return true;
- if (this->end.IsNextSegment() && r1end == r2end)
- return true;
- return false;
- }
-
- bool ExtendsIntoPreviousSegment() const
- {
- return start.IsPreviousSegment();
- }
-
- bool ExtendsIntoNextSegment() const
- {
- return end.IsNextSegment();
- }
-
- bool IsNextSegmentOnly() const
- {
- if(!start.IsNextSegment())
- return false;
- cemu_assert_debug(end.IsNextSegment());
- return true;
- }
-
- bool IsPreviousSegmentOnly() const
- {
- if (!end.IsPreviousSegment())
- return false;
- cemu_assert_debug(start.IsPreviousSegment());
- return true;
- }
-
- sint32 GetDistance() const
- {
- // todo - assert if either start or end is outside the segment
- // we may also want to switch this to raw indices?
- return end.GetInstructionIndex() - start.GetInstructionIndex();
- }
-};
-
-struct PPCSegmentRegisterAllocatorInfo_t
-{
- // used during loop detection
- bool isPartOfProcessedLoop{};
- sint32 lastIterationIndex{};
- // linked lists
- struct raLivenessRange* linkedList_allSubranges{};
- std::unordered_map linkedList_perVirtualRegister;
-};
-
-struct IMLSegment
-{
- sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection)
- sint32 loopDepth{};
- uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address)
- uint32 x64Offset{}; // x64 code offset of segment start
- // list of intermediate instructions in this segment
- std::vector imlList;
- // segment link
- IMLSegment* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch
- IMLSegment* nextSegmentBranchTaken{};
- bool nextSegmentIsUncertain{};
- std::vector list_prevSegments{};
- // source for overwrite analysis (if nextSegmentIsUncertain is true)
- // sometimes a segment is marked as an exit point, but for the purposes of dead code elimination we know the next segment
- IMLSegment* deadCodeEliminationHintSeg{};
- std::vector list_deadCodeHintBy{};
- // enterable segments
- bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
- uint32 enterPPCAddress{}; // used if isEnterable is true
- // register allocator info
- PPCSegmentRegisterAllocatorInfo_t raInfo{};
- // segment state API
- void SetEnterable(uint32 enterAddress);
- void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst);
- void SetLinkBranchTaken(IMLSegment* imlSegmentDst);
-
- IMLSegment* GetBranchTaken()
- {
- return nextSegmentBranchTaken;
- }
-
- IMLSegment* GetBranchNotTaken()
- {
- return nextSegmentBranchNotTaken;
- }
-
- void SetNextSegmentForOverwriteHints(IMLSegment* seg)
- {
- cemu_assert_debug(!deadCodeEliminationHintSeg);
- deadCodeEliminationHintSeg = seg;
- if (seg)
- seg->list_deadCodeHintBy.push_back(this);
- }
-
- // instruction API
- IMLInstruction* AppendInstruction();
-
- bool HasSuffixInstruction() const;
- sint32 GetSuffixInstructionIndex() const;
- IMLInstruction* GetLastInstruction();
-
- // segment points
- IMLSegmentPoint* segmentPointList{};
-};
-
-
-void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
-void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
-void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew);
-void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
index 96b5143e..e558292b 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
+++ b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
@@ -21,16 +21,6 @@ public:
};
public:
- ~PPCFunctionBoundaryTracker()
- {
- while (!map_ranges.empty())
- {
- PPCRange_t* range = *map_ranges.begin();
- delete range;
- map_ranges.erase(map_ranges.begin());
- }
- }
-
void trackStartPoint(MPTR startAddress)
{
processRange(startAddress, nullptr, nullptr);
@@ -50,34 +40,10 @@ public:
return false;
}
- std::vector GetRanges()
- {
- std::vector r;
- for (auto& it : map_ranges)
- r.emplace_back(*it);
- return r;
- }
-
- bool ContainsAddress(uint32 addr) const
- {
- for (auto& it : map_ranges)
- {
- if (addr >= it->startAddress && addr < it->getEndAddress())
- return true;
- }
- return false;
- }
-
- const std::set& GetBranchTargets() const
- {
- return map_branchTargetsAll;
- }
-
private:
void addBranchDestination(PPCRange_t* sourceRange, MPTR address)
{
- map_queuedBranchTargets.emplace(address);
- map_branchTargetsAll.emplace(address);
+ map_branchTargets.emplace(address);
}
// process flow of instruction
@@ -148,7 +114,7 @@ private:
Espresso::BOField BO;
uint32 BI;
bool LK;
- Espresso::decodeOp_BCSPR(opcode, BO, BI, LK);
+ Espresso::decodeOp_BCLR(opcode, BO, BI, LK);
if (BO.branchAlways() && !LK)
{
// unconditional BLR
@@ -252,7 +218,7 @@ private:
auto rangeItr = map_ranges.begin();
PPCRange_t* previousRange = nullptr;
- for (std::set::const_iterator targetItr = map_queuedBranchTargets.begin() ; targetItr != map_queuedBranchTargets.end(); )
+ for (std::set::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); )
{
while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr))
{
@@ -273,7 +239,7 @@ private:
(*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length))
{
// delete visited targets
- targetItr = map_queuedBranchTargets.erase(targetItr);
+ targetItr = map_branchTargets.erase(targetItr);
continue;
}
@@ -323,6 +289,5 @@ private:
};
std::set map_ranges;
- std::set map_queuedBranchTargets;
- std::set map_branchTargetsAll;
+ std::set map_branchTargets;
};
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp
index 6125c7da..24e87bd1 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp
@@ -2,6 +2,7 @@
#include "PPCFunctionBoundaryTracker.h"
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
+#include "PPCRecompilerX64.h"
#include "Cafe/OS/RPL/rpl.h"
#include "util/containers/RangeStore.h"
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
@@ -13,17 +14,6 @@
#include "util/helpers/helpers.h"
#include "util/MemMapper/MemMapper.h"
-#include "IML/IML.h"
-#include "IML/IMLRegisterAllocator.h"
-#include "BackendX64/BackendX64.h"
-#ifdef __aarch64__
-#include "BackendAArch64/BackendAArch64.h"
-#endif
-#include "util/highresolutiontimer/HighResolutionTimer.h"
-
-#define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock
-#define PPCREC_LOG_RECOMPILATION_RESULTS 0
-
struct PPCInvalidationRange
{
MPTR startAddress;
@@ -47,36 +37,11 @@ void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
-#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
-static std::mutex s_singleRecompilationMutex;
-#endif
-
bool ppcRecompilerEnabled = false;
-void PPCRecompiler_recompileAtAddress(uint32 address);
-
// this function does never block and can fail if the recompiler lock cannot be acquired immediately
void PPCRecompiler_visitAddressNoBlock(uint32 enterAddress)
{
-#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
- if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
- return;
- PPCRecompilerState.recompilerSpinlock.lock();
- if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
- {
- PPCRecompilerState.recompilerSpinlock.unlock();
- return;
- }
- ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited;
- PPCRecompilerState.recompilerSpinlock.unlock();
- s_singleRecompilationMutex.lock();
- if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] == PPCRecompiler_leaveRecompilerCode_visited)
- {
- PPCRecompiler_recompileAtAddress(enterAddress);
- }
- s_singleRecompilationMutex.unlock();
- return;
-#endif
// quick read-only check without lock
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
return;
@@ -162,15 +127,15 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress)
PPCRecompiler_enter(hCPU, funcPtr);
}
}
-bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext);
-PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut, PPCFunctionBoundaryTracker& boundaryTracker)
+PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut)
{
if (range.startAddress >= PPC_REC_CODE_AREA_END)
{
cemuLog_log(LogType::Force, "Attempting to recompile function outside of allowed code area");
return nullptr;
}
+
uint32 codeGenRangeStart;
uint32 codeGenRangeSize = 0;
coreinit::OSGetCodegenVirtAddrRangeInternal(codeGenRangeStart, codeGenRangeSize);
@@ -188,69 +153,29 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
ppcRecFunc->ppcAddress = range.startAddress;
ppcRecFunc->ppcSize = range.length;
-
-#if PPCREC_LOG_RECOMPILATION_RESULTS
- BenchmarkTimer bt;
- bt.Start();
-#endif
-
// generate intermediate code
ppcImlGenContext_t ppcImlGenContext = { 0 };
- ppcImlGenContext.debug_entryPPCAddress = range.startAddress;
- bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker);
+ bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses);
if (compiledSuccessfully == false)
{
+ // todo: Free everything
+ PPCRecompiler_freeContext(&ppcImlGenContext);
delete ppcRecFunc;
- return nullptr;
+ return NULL;
}
-
- uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr();
- uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr();
-
- if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0)
- {
- if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr)
- {
- delete ppcRecFunc;
- return nullptr;
- }
- }
-
- // apply passes
- if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext))
- {
- delete ppcRecFunc;
- return nullptr;
- }
-
-#if defined(ARCH_X86_64)
// emit x64 code
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
if (x64GenerationSuccess == false)
{
+ PPCRecompiler_freeContext(&ppcImlGenContext);
return nullptr;
}
-#elif defined(__aarch64__)
- bool aarch64GenerationSuccess = PPCRecompiler_generateAArch64Code(ppcRecFunc, &ppcImlGenContext);
- if (aarch64GenerationSuccess == false)
- {
- return nullptr;
- }
-#endif
- if (ActiveSettings::DumpRecompilerFunctionsEnabled())
- {
- FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath(fmt::format("dump/recompiler/ppc_{:08x}.bin", ppcRecFunc->ppcAddress)));
- if (fs)
- {
- fs->writeData(ppcRecFunc->x86Code, ppcRecFunc->x86Size);
- delete fs;
- }
- }
// collect list of PPC-->x64 entry points
entryPointsOut.clear();
- for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2)
+ for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++)
{
+ PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
if (imlSegment->isEnterable == false)
continue;
@@ -260,94 +185,10 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
}
-#if PPCREC_LOG_RECOMPILATION_RESULTS
- bt.Stop();
- uint32 codeHash = 0;
- for (uint32 i = 0; i < ppcRecFunc->x86Size; i++)
- {
- codeHash = _rotr(codeHash, 3);
- codeHash += ((uint8*)ppcRecFunc->x86Code)[i];
- }
- cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash);
-#endif
-
+ PPCRecompiler_freeContext(&ppcImlGenContext);
return ppcRecFunc;
}
-void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenContext)
-{
- IMLRegisterAllocatorParameters raParam;
-
- for (auto& it : ppcImlGenContext.mappedRegs)
- raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first);
-
-#if defined(ARCH_X86_64)
- auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBX);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBP);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RSI);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDI);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R12);
- gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
-
- // add XMM registers, except XMM15 which is the temporary register
- auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 0);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 1);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 2);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 3);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 4);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 5);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 6);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 7);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 8);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 9);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 10);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 11);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13);
- fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14);
-#elif defined(__aarch64__)
- auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
- for (auto i = IMLArchAArch64::PHYSREG_GPR_BASE; i < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT; i++)
- {
- if (i == IMLArchAArch64::PHYSREG_GPR_BASE + 18)
- continue; // Skip reserved platform register
- gprPhysPool.SetAvailable(i);
- }
-
- auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64);
- for (auto i = IMLArchAArch64::PHYSREG_FPR_BASE; i < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT; i++)
- fprPhysPool.SetAvailable(i);
-#endif
-
- IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam);
-}
-
-bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
-{
- // isolate entry points from function flow (enterable segments must not be the target of any other segment)
- // this simplifies logic during register allocation
- PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext);
-
- // merge certain float load+store patterns
- IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext);
- // delay byte swapping for certain load+store patterns
- IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext);
-
- IMLOptimizer_StandardOptimizationPass(ppcImlGenContext);
-
- PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext);
-
- return true;
-}
-
bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector>& entryPoints)
{
// update jump table
@@ -361,7 +202,7 @@ bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFun
return false;
}
- // check if the current range got invalidated during the time it took to recompile it
+ // check if the current range got invalidated in the time it took to recompile it
bool isInvalidated = false;
for (auto& invRange : PPCRecompilerState.invalidationRanges)
{
@@ -439,7 +280,7 @@ void PPCRecompiler_recompileAtAddress(uint32 address)
PPCRecompilerState.recompilerSpinlock.unlock();
std::vector> functionEntryPoints;
- auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints, funcBoundaries);
+ auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints);
if (!func)
{
@@ -454,10 +295,6 @@ std::atomic_bool s_recompilerThreadStopSignal{false};
void PPCRecompiler_thread()
{
SetThreadName("PPCRecompiler");
-#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
- return;
-#endif
-
while (true)
{
if(s_recompilerThreadStopSignal)
@@ -638,6 +475,44 @@ void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr)
#if defined(ARCH_X86_64)
void PPCRecompiler_initPlatform()
{
+ // mxcsr
+ ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
+ ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
+}
+#else
+void PPCRecompiler_initPlatform()
+{
+
+}
+#endif
+
+void PPCRecompiler_init()
+{
+ if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter)
+ {
+ ppcRecompilerEnabled = false;
+ return;
+ }
+ if (LaunchSettings::ForceInterpreter())
+ {
+ cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter was passed");
+ return;
+ }
+ if (ppcRecompilerInstanceData)
+ {
+ MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t));
+ ppcRecompilerInstanceData = nullptr;
+ }
+ debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
+ ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
+ MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
+ PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
+
+ PPCRecompiler_allocateRange(0, 0x1000); // the first entry is used for fallback to interpreter
+ PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
+ PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
+
+ // init x64 recompiler instance data
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL;
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL;
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL;
@@ -673,45 +548,44 @@ void PPCRecompiler_initPlatform()
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000;
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000;
- // mxcsr
- ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
- ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
-}
-#else
-void PPCRecompiler_initPlatform()
-{
-
-}
-#endif
+ // setup GQR scale tables
-void PPCRecompiler_init()
-{
- if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter)
+ for (uint32 i = 0; i < 32; i++)
{
- ppcRecompilerEnabled = false;
- return;
+ float a = 1.0f / (float)(1u << i);
+ float b = 0;
+ if (i == 0)
+ b = 4294967296.0f;
+ else
+ b = (float)(1u << (32u - i));
+
+ float ar = (float)(1u << i);
+ float br = 0;
+ if (i == 0)
+ br = 1.0f / 4294967296.0f;
+ else
+ br = 1.0f / (float)(1u << (32u - i));
+
+ ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 0] = a;
+ ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 1] = 1.0f;
+ ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 0] = b;
+ ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
+
+ ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 0] = a;
+ ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 1] = a;
+ ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 0] = b;
+ ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 1] = b;
+
+ ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 0] = ar;
+ ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 1] = 1.0f;
+ ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 0] = br;
+ ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
+
+ ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 0] = ar;
+ ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 1] = ar;
+ ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 0] = br;
+ ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br;
}
- if (LaunchSettings::ForceInterpreter() || LaunchSettings::ForceMultiCoreInterpreter())
- {
- cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter or force-multicore-interpreter was passed");
- return;
- }
- if (ppcRecompilerInstanceData)
- {
- MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t));
- ppcRecompilerInstanceData = nullptr;
- }
- debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
- ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
- MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
-#ifdef ARCH_X86_64
- PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
-#elif defined(__aarch64__)
- PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
-#endif
- PPCRecompiler_allocateRange(0, 0x1000); // the first entry is used for fallback to interpreter
- PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
- PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
PPCRecompiler_initPlatform();
@@ -749,4 +623,4 @@ void PPCRecompiler_Shutdown()
// mark as unmapped
ppcRecompiler_reservedBlockMask[i] = false;
}
-}
+}
\ No newline at end of file
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h
index 47902630..2e40f19d 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h
+++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h
@@ -1,4 +1,4 @@
-#pragma once
+#include
#define PPC_REC_CODE_AREA_START (0x00000000) // lower bound of executable memory area. Recompiler expects this address to be 0
#define PPC_REC_CODE_AREA_END (0x10000000) // upper bound of executable memory area
@@ -6,113 +6,336 @@
#define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1))
-#define PPC_REC_MAX_VIRTUAL_GPR (40 + 32) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
+#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
-struct ppcRecRange_t
+typedef struct
{
uint32 ppcAddress;
uint32 ppcSize;
+ //void* x86Start;
+ //size_t x86Size;
void* storedRange;
-};
+}ppcRecRange_t;
-struct PPCRecFunction_t
+typedef struct
{
uint32 ppcAddress;
uint32 ppcSize; // ppc code size of function
void* x86Code; // pointer to x86 code
size_t x86Size;
std::vector list_ranges;
+}PPCRecFunction_t;
+
+#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0)
+#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1)
+#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision
+#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used
+
+typedef struct
+{
+ uint8 type;
+ uint8 operation;
+ uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr.
+ uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior
+ uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated
+ uint32 associatedPPCAddress; // ppc address that is associated with this instruction
+ union
+ {
+ struct
+ {
+ uint8 _padding[7];
+ }padding;
+ struct
+ {
+ // R (op) A [update cr* in mode *]
+ uint8 registerResult;
+ uint8 registerA;
+ }op_r_r;
+ struct
+ {
+ // R = A (op) B [update cr* in mode *]
+ uint8 registerResult;
+ uint8 registerA;
+ uint8 registerB;
+ }op_r_r_r;
+ struct
+ {
+ // R = A (op) immS32 [update cr* in mode *]
+ uint8 registerResult;
+ uint8 registerA;
+ sint32 immS32;
+ }op_r_r_s32;
+ struct
+ {
+ // R/F = NAME or NAME = R/F
+ uint8 registerIndex;
+ uint8 copyWidth;
+ uint32 name;
+ uint8 flags;
+ }op_r_name;
+ struct
+ {
+ // R (op) s32 [update cr* in mode *]
+ uint8 registerIndex;
+ sint32 immS32;
+ }op_r_immS32;
+ struct
+ {
+ uint32 address;
+ uint8 flags;
+ }op_jumpmark;
+ struct
+ {
+ uint32 param;
+ uint32 param2;
+ uint16 paramU16;
+ }op_macro;
+ struct
+ {
+ uint32 jumpmarkAddress;
+ bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress
+ uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup)
+ uint8 crRegisterIndex;
+ uint8 crBitIndex;
+ bool bitMustBeSet;
+ }op_conditionalJump;
+ struct
+ {
+ uint8 registerData;
+ uint8 registerMem;
+ uint8 registerMem2;
+ uint8 registerGQR;
+ uint8 copyWidth;
+ //uint8 flags;
+ struct
+ {
+ bool swapEndian : 1;
+ bool signExtend : 1;
+ bool notExpanded : 1; // for floats
+ }flags2;
+ uint8 mode; // transfer mode (copy width, ps0/ps1 behavior)
+ sint32 immS32;
+ }op_storeLoad;
+ struct
+ {
+ struct
+ {
+ uint8 registerMem;
+ sint32 immS32;
+ }src;
+ struct
+ {
+ uint8 registerMem;
+ sint32 immS32;
+ }dst;
+ uint8 copyWidth;
+ }op_mem2mem;
+ struct
+ {
+ uint8 registerResult;
+ uint8 registerOperand;
+ uint8 flags;
+ }op_fpr_r_r;
+ struct
+ {
+ uint8 registerResult;
+ uint8 registerOperandA;
+ uint8 registerOperandB;
+ uint8 flags;
+ }op_fpr_r_r_r;
+ struct
+ {
+ uint8 registerResult;
+ uint8 registerOperandA;
+ uint8 registerOperandB;
+ uint8 registerOperandC;
+ uint8 flags;
+ }op_fpr_r_r_r_r;
+ struct
+ {
+ uint8 registerResult;
+ //uint8 flags;
+ }op_fpr_r;
+ struct
+ {
+ uint32 ppcAddress;
+ uint32 x64Offset;
+ }op_ppcEnter;
+ struct
+ {
+ uint8 crD; // crBitIndex (result)
+ uint8 crA; // crBitIndex
+ uint8 crB; // crBitIndex
+ }op_cr;
+ // conditional operations (emitted if supported by target platform)
+ struct
+ {
+ // r_s32
+ uint8 registerIndex;
+ sint32 immS32;
+ // condition
+ uint8 crRegisterIndex;
+ uint8 crBitIndex;
+ bool bitMustBeSet;
+ }op_conditional_r_s32;
+ };
+}PPCRecImlInstruction_t;
+
+typedef struct _PPCRecImlSegment_t PPCRecImlSegment_t;
+
+typedef struct _ppcRecompilerSegmentPoint_t
+{
+ sint32 index;
+ PPCRecImlSegment_t* imlSegment;
+ _ppcRecompilerSegmentPoint_t* next;
+ _ppcRecompilerSegmentPoint_t* prev;
+}ppcRecompilerSegmentPoint_t;
+
+struct raLivenessLocation_t
+{
+ sint32 index;
+ bool isRead;
+ bool isWrite;
+
+ raLivenessLocation_t() = default;
+
+ raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
+ : index(index), isRead(isRead), isWrite(isWrite) {};
};
-#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h"
-#include "Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h"
+struct raLivenessSubrangeLink_t
+{
+ struct raLivenessSubrange_t* prev;
+ struct raLivenessSubrange_t* next;
+};
-struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct ppcImlGenContext_t* ppcImlGenContext);
+struct raLivenessSubrange_t
+{
+ struct raLivenessRange_t* range;
+ PPCRecImlSegment_t* imlSegment;
+ ppcRecompilerSegmentPoint_t start;
+ ppcRecompilerSegmentPoint_t end;
+ // dirty state tracking
+ bool _noLoad;
+ bool hasStore;
+ bool hasStoreDelayed;
+ // next
+ raLivenessSubrange_t* subrangeBranchTaken;
+ raLivenessSubrange_t* subrangeBranchNotTaken;
+ // processing
+ uint32 lastIterationIndex;
+ // instruction locations
+ std::vector list_locations;
+ // linked list (subranges with same GPR virtual register)
+ raLivenessSubrangeLink_t link_sameVirtualRegisterGPR;
+ // linked list (all subranges for this segment)
+ raLivenessSubrangeLink_t link_segmentSubrangesGPR;
+};
+
+struct raLivenessRange_t
+{
+ sint32 virtualRegister;
+ sint32 physicalRegister;
+ sint32 name;
+ std::vector list_subranges;
+};
+
+struct PPCSegmentRegisterAllocatorInfo_t
+{
+ // analyzer stage
+ bool isPartOfProcessedLoop{}; // used during loop detection
+ sint32 lastIterationIndex{};
+ // linked lists
+ raLivenessSubrange_t* linkedList_allSubranges{};
+ raLivenessSubrange_t* linkedList_perVirtualGPR[PPC_REC_MAX_VIRTUAL_GPR]{};
+};
+
+struct PPCRecVGPRDistances_t
+{
+ struct _RegArrayEntry
+ {
+ sint32 usageStart{};
+ sint32 usageEnd{};
+ }reg[PPC_REC_MAX_VIRTUAL_GPR];
+ bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{};
+};
+
+typedef struct _PPCRecImlSegment_t
+{
+ sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection)
+ sint32 startOffset{}; // offset to first instruction in iml instruction list
+ sint32 count{}; // number of instructions in segment
+ uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address)
+ uint32 x64Offset{}; // x64 code offset of segment start
+ uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly)
+ // list of intermediate instructions in this segment
+ PPCRecImlInstruction_t* imlList{};
+ sint32 imlListSize{};
+ sint32 imlListCount{};
+ // segment link
+ _PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch
+ _PPCRecImlSegment_t* nextSegmentBranchTaken{};
+ bool nextSegmentIsUncertain{};
+ sint32 loopDepth{};
+ //sList_t* list_prevSegments;
+ std::vector<_PPCRecImlSegment_t*> list_prevSegments{};
+ // PPC range of segment
+ uint32 ppcAddrMin{};
+ uint32 ppcAddrMax{};
+ // enterable segments
+ bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
+ uint32 enterPPCAddress{}; // used if isEnterable is true
+ // jump destination segments
+ bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps
+ uint32 jumpDestinationPPCAddress{};
+ // PPC FPR use mask
+ bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR
+ // CR use mask
+ uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten)
+ uint32 crBitsRead{}; // all bits that are read in this segment
+ uint32 crBitsWritten{}; // bits that are written in this segment
+ // register allocator info
+ PPCSegmentRegisterAllocatorInfo_t raInfo{};
+ PPCRecVGPRDistances_t raDistances{};
+ bool raRangeExtendProcessed{};
+ // segment points
+ ppcRecompilerSegmentPoint_t* segmentPointList{};
+}PPCRecImlSegment_t;
struct ppcImlGenContext_t
{
- class PPCFunctionBoundaryTracker* boundaryTracker;
+ PPCRecFunction_t* functionRef;
uint32* currentInstruction;
uint32 ppcAddressOfCurrentInstruction;
- IMLSegment* currentOutputSegment;
- struct PPCBasicBlockInfo* currentBasicBlock{};
// fpr mode
bool LSQE{ true };
bool PSE{ true };
// cycle counter
uint32 cyclesSinceLastBranch; // used to track ppc cycles
- std::unordered_map mappedRegs;
-
- uint32 GetMaxRegId() const
- {
- if (mappedRegs.empty())
- return 0;
- return mappedRegs.size()-1;
- }
-
+ // temporary general purpose registers
+ uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR];
+ // temporary floating point registers (single and double precision)
+ uint32 mappedFPRRegister[256];
+ // list of intermediate instructions
+ PPCRecImlInstruction_t* imlList;
+ sint32 imlListSize;
+ sint32 imlListCount;
// list of segments
- std::vector segmentList2;
+ PPCRecImlSegment_t** segmentList;
+ sint32 segmentListSize;
+ sint32 segmentListCount;
// code generation control
bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode
+ // register allocator info
+ struct
+ {
+ std::vector list_ranges;
+ }raInfo;
// analysis info
struct
{
bool modifiesGQR[8];
}tracking;
- // debug helpers
- uint32 debug_entryPPCAddress{0};
-
- ~ppcImlGenContext_t()
- {
- for (IMLSegment* imlSegment : segmentList2)
- delete imlSegment;
- segmentList2.clear();
- }
-
- // append raw instruction
- IMLInstruction& emitInst()
- {
- return *PPCRecompilerImlGen_generateNewEmptyInstruction(this);
- }
-
- IMLSegment* NewSegment()
- {
- IMLSegment* seg = new IMLSegment();
- segmentList2.emplace_back(seg);
- return seg;
- }
-
- size_t GetSegmentIndex(IMLSegment* seg)
- {
- for (size_t i = 0; i < segmentList2.size(); i++)
- {
- if (segmentList2[i] == seg)
- return i;
- }
- cemu_assert_error();
- return 0;
- }
-
- IMLSegment* InsertSegment(size_t index)
- {
- IMLSegment* newSeg = new IMLSegment();
- segmentList2.insert(segmentList2.begin() + index, 1, newSeg);
- return newSeg;
- }
-
- std::span InsertSegments(size_t index, size_t count)
- {
- segmentList2.insert(segmentList2.begin() + index, count, {});
- for (size_t i = index; i < (index + count); i++)
- segmentList2[i] = new IMLSegment();
- return { segmentList2.data() + index, count};
- }
-
- void UpdateSegmentIndices()
- {
- for (size_t i = 0; i < segmentList2.size(); i++)
- segmentList2[i]->momentaryIndex = (sint32)i;
- }
};
typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)();
@@ -136,6 +359,11 @@ typedef struct
alignas(16) float _x64XMM_constFloatMin[2];
alignas(16) uint32 _x64XMM_flushDenormalMask1[4];
alignas(16) uint32 _x64XMM_flushDenormalMaskResetSignBits[4];
+ // PSQ load/store scale tables
+ double _psq_ld_scale_ps0_ps1[64 * 2];
+ double _psq_ld_scale_ps0_1[64 * 2];
+ double _psq_st_scale_ps0_ps1[64 * 2];
+ double _psq_st_scale_ps0_1[64 * 2];
// MXCSR
uint32 _x64XMM_mxCsr_ftzOn;
uint32 _x64XMM_mxCsr_ftzOff;
@@ -157,6 +385,8 @@ extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
#define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1)
+// todo - move some of the stuff above into PPCRecompilerInternal.h
+
// recompiler interface
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress);
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h
index bfb2aed5..86af33b2 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h
+++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h
@@ -1,33 +1,293 @@
-bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses, class PPCFunctionBoundaryTracker& boundaryTracker);
-IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo);
-IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo);
+#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example)
-void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext);
+enum
+{
+ PPCREC_IML_OP_ASSIGN, // '=' operator
+ PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
+ PPCREC_IML_OP_ADD, // '+' operator
+ PPCREC_IML_OP_SUB, // '-' operator
+ PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit
+ PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr)
+ PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr)
+ PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
+ PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
+ PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
+ PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
+ PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
+ PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit
+ PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit
+ PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag
+ PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag
+ // assign operators with cast
+ PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend
+ PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend
+ // binary operation
+ PPCREC_IML_OP_OR, // '|' operator
+ PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first
+ PPCREC_IML_OP_AND, // '&' operator
+ PPCREC_IML_OP_XOR, // '^' operator
+ PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
+ PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
+ PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned)
+ PPCREC_IML_OP_NOT, // complement each bit
+ PPCREC_IML_OP_NEG, // negate
+ // ppc
+ PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask)
+ PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag)
+ PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
+ PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
+ PPCREC_IML_OP_CNTLZW,
+ PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry)
+ PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
+ PPCREC_IML_OP_MFCR, // copy cr to gpr
+ PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask)
+ // condition register
+ PPCREC_IML_OP_CR_CLEAR, // clear cr bit
+ PPCREC_IML_OP_CR_SET, // set cr bit
+ PPCREC_IML_OP_CR_OR, // OR cr bits
+ PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first
+ PPCREC_IML_OP_CR_AND, // AND cr bits
+ PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first
+ // FPU
+ PPCREC_IML_OP_FPR_ADD_BOTTOM,
+ PPCREC_IML_OP_FPR_ADD_PAIR,
+ PPCREC_IML_OP_FPR_SUB_PAIR,
+ PPCREC_IML_OP_FPR_SUB_BOTTOM,
+ PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM,
+ PPCREC_IML_OP_FPR_MULTIPLY_PAIR,
+ PPCREC_IML_OP_FPR_DIVIDE_BOTTOM,
+ PPCREC_IML_OP_FPR_DIVIDE_PAIR,
+ PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP,
+ PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP,
+ PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM,
+ PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched
+ PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched
+ PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
+ PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
+ PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
+ PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half
+ PPCREC_IML_OP_FPR_FCMPO_BOTTOM,
+ PPCREC_IML_OP_FPR_FCMPU_BOTTOM,
+ PPCREC_IML_OP_FPR_FCMPU_TOP,
+ PPCREC_IML_OP_FPR_NEGATE_BOTTOM,
+ PPCREC_IML_OP_FPR_NEGATE_PAIR,
+ PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0)
+ PPCREC_IML_OP_FPR_ABS_PAIR,
+ PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy)
+ PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy)
+ PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0)
+ PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
+ PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision
+ PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT,
+ PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
+ PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
+ PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
+ // PS
+ PPCREC_IML_OP_FPR_SUM0,
+ PPCREC_IML_OP_FPR_SUM1,
+};
-IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
-void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount);
-IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 index);
+#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
+
+enum
+{
+ PPCREC_IML_MACRO_BLR, // macro for BLR instruction code
+ PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code
+ PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code
+ PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code
+ PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
+ PPCREC_IML_MACRO_B_FAR, // branch to different function
+ PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
+ PPCREC_IML_MACRO_HLE, // HLE function call
+ PPCREC_IML_MACRO_MFTB, // get TB register value (low or high)
+ PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
+ // debugging
+ PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
+};
+
+enum
+{
+ PPCREC_JUMP_CONDITION_NONE,
+ PPCREC_JUMP_CONDITION_E, // equal / zero
+ PPCREC_JUMP_CONDITION_NE, // not equal / not zero
+ PPCREC_JUMP_CONDITION_LE, // less or equal
+ PPCREC_JUMP_CONDITION_L, // less
+ PPCREC_JUMP_CONDITION_GE, // greater or equal
+ PPCREC_JUMP_CONDITION_G, // greater
+ // special case:
+ PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling
+ PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow
+
+};
+
+enum
+{
+ PPCREC_CR_MODE_COMPARE_SIGNED,
+ PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare
+ // others: PPCREC_CR_MODE_ARITHMETIC,
+ PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code)
+ PPCREC_CR_MODE_LOGICAL,
+};
+
+enum
+{
+ PPCREC_IML_TYPE_NONE,
+ PPCREC_IML_TYPE_NO_OP, // no-op instruction
+ PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction)
+ PPCREC_IML_TYPE_R_R, // r* (op) *r
+ PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
+ PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
+ PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
+ PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
+ PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
+ PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
+ PPCREC_IML_TYPE_R_NAME, // r* = name
+ PPCREC_IML_TYPE_NAME_R, // name* = r*
+ PPCREC_IML_TYPE_R_S32, // r* (op) imm
+ PPCREC_IML_TYPE_MACRO,
+ PPCREC_IML_TYPE_CJUMP, // conditional jump
+ PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0
+ PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable
+ PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands)
+ // conditional
+ PPCREC_IML_TYPE_CONDITIONAL_R_S32,
+ // FPR
+ PPCREC_IML_TYPE_FPR_R_NAME, // name = f*
+ PPCREC_IML_TYPE_FPR_NAME_R, // f* = name
+ PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
+ PPCREC_IML_TYPE_FPR_R_R,
+ PPCREC_IML_TYPE_FPR_R_R_R,
+ PPCREC_IML_TYPE_FPR_R_R_R_R,
+ PPCREC_IML_TYPE_FPR_R,
+ // special
+ PPCREC_IML_TYPE_MEM2MEM, // memory to memory copy (deprecated)
+
+};
+
+enum
+{
+ PPCREC_NAME_NONE,
+ PPCREC_NAME_TEMPORARY,
+ PPCREC_NAME_R0 = 1000,
+ PPCREC_NAME_SPR0 = 2000,
+ PPCREC_NAME_FPR0 = 3000,
+ PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7
+ //PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away)
+};
+
+// special cases for LOAD/STORE
+#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value)
+#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid)
+#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1
+#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2
+#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3
+#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1
+#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2
+#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3
+
+#define PPC_REC_INVALID_REGISTER 0xFF
+
+#define PPCREC_CR_BIT_LT 0
+#define PPCREC_CR_BIT_GT 1
+#define PPCREC_CR_BIT_EQ 2
+#define PPCREC_CR_BIT_SO 3
+
+enum
+{
+ // fpr load
+ PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0,
+ PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1,
+ PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0,
+ PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0,
+ PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1,
+ PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0,
+ PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1,
+ PPCREC_FPR_LD_MODE_PSQ_S16_PS0,
+ PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1,
+ PPCREC_FPR_LD_MODE_PSQ_U16_PS0,
+ PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1,
+ PPCREC_FPR_LD_MODE_PSQ_S8_PS0,
+ PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1,
+ PPCREC_FPR_LD_MODE_PSQ_U8_PS0,
+ PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1,
+ // fpr store
+ PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0
+ PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0
+
+ PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
+
+ PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1,
+ PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0,
+ PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1,
+ PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0,
+ PPCREC_FPR_ST_MODE_PSQ_S8_PS0,
+ PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1,
+ PPCREC_FPR_ST_MODE_PSQ_U8_PS0,
+ PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1,
+ PPCREC_FPR_ST_MODE_PSQ_U16_PS0,
+ PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1,
+ PPCREC_FPR_ST_MODE_PSQ_S16_PS0,
+ PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1,
+};
+
+bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses);
+void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor
+
+PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
+void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount);
+PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index);
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count);
-void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index);
-void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint);
+void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index);
+void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint);
-// Register management
-IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat);
+// GPR register management
+uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
+uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
-IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
+// FPR register management
+uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
+uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
// IML instruction generation
-void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
+void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress);
+void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
+
+void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode);
+void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
+void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0);
+
+
+
+// IML instruction generation (new style, can generate new instructions but also overwrite existing ones)
+
+void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
+void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth);
+
+void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER);
// IML generation - FPU
-bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
-bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
-bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
-bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool hasUpdate, bool isDouble);
+bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
@@ -53,17 +313,22 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
-bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
-bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
-bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
-bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
+bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
-bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative);
+bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
+bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
@@ -82,20 +347,76 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o
// IML general
+bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml);
+void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext);
+void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
+void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
+void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew);
+void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
-void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken);
-void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchNotTaken); // no else segment
-void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex);
+PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment);
-class IMLRedirectInstOutput
+// IML analyzer
+typedef struct
{
-public:
- IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment);
- ~IMLRedirectInstOutput();
+ uint32 readCRBits;
+ uint32 writtenCRBits;
+}PPCRecCRTracking_t;
+
+bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment);
+bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction);
+void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking);
+
+// IML optimizer
+bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
+
+bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
+
+void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext);
+void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext);
+void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext);
+
+void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext);
+
+// IML register allocator
+void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext);
+
+// late optimizations
+void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext);
+
+// debug
+
+void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
-private:
- ppcImlGenContext_t* m_context;
- IMLSegment* m_prevSegment;
-};
\ No newline at end of file
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ sint16 readNamedReg1;
+ sint16 readNamedReg2;
+ sint16 readNamedReg3;
+ sint16 writtenNamedReg1;
+ };
+ sint16 gpr[4]; // 3 read + 1 write
+ };
+ // FPR
+ union
+ {
+ struct
+ {
+ // note: If destination operand is not fully written, it will be added as a read FPR as well
+ sint16 readFPR1;
+ sint16 readFPR2;
+ sint16 readFPR3;
+ sint16 readFPR4; // usually this is set to the result FPR if only partially overwritten
+ sint16 writtenFPR1;
+ };
+ sint16 fpr[4];
+ };
+}PPCImlOptimizerUsedRegisters_t;
+
+void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed);
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
new file mode 100644
index 00000000..4962d30d
--- /dev/null
+++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
@@ -0,0 +1,137 @@
+#include "PPCRecompiler.h"
+#include "PPCRecompilerIml.h"
+#include "util/helpers/fixedSizeList.h"
+#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
+
+/*
+ * Initializes a single segment and returns true if it is a finite loop
+ */
+bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment)
+{
+ bool isTightFiniteLoop = false;
+ // base criteria, must jump to beginning of same segment
+ if (imlSegment->nextSegmentBranchTaken != imlSegment)
+ return false;
+ // loops using BDNZ are assumed to always be finite
+ for (sint32 t = 0; t < imlSegment->imlListCount; t++)
+ {
+ if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB && imlSegment->imlList[t].crRegister == 8)
+ {
+ return true;
+ }
+ }
+ // for non-BDNZ loops, check for common patterns
+ // risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB)
+ // this catches most loops with load-update and store-update instructions, but also those with decrementing counters
+ FixedSizeList list_modifiedRegisters;
+ for (sint32 t = 0; t < imlSegment->imlListCount; t++)
+ {
+ if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB) )
+ {
+ list_modifiedRegisters.addUnique(imlSegment->imlList[t].op_r_immS32.registerIndex);
+ }
+ }
+ if (list_modifiedRegisters.count > 0)
+ {
+ // remove all registers from the list that are modified by non-ADD/SUB instructions
+ // todo: We should also cover the case where ADD+SUB on the same register cancel the effect out
+ PPCImlOptimizerUsedRegisters_t registersUsed;
+ for (sint32 t = 0; t < imlSegment->imlListCount; t++)
+ {
+ if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB))
+ continue;
+ PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + t, ®istersUsed);
+ if(registersUsed.writtenNamedReg1 < 0)
+ continue;
+ list_modifiedRegisters.remove(registersUsed.writtenNamedReg1);
+ }
+ if (list_modifiedRegisters.count > 0)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister)
+*/
+bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction)
+{
+ if (imlInstruction->type == PPCREC_IML_TYPE_R_R)
+ return true;
+ if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
+ return true;
+ if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
+ return true;
+ if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
+ return true;
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R)
+ return true;
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R)
+ return true;
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R)
+ return true;
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
+ return true;
+ return false;
+}
+
+void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking)
+{
+ crTracking->readCRBits = 0;
+ crTracking->writtenCRBits = 0;
+ if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP)
+ {
+ if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
+ {
+ uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex);
+ crTracking->readCRBits = (crBitFlag);
+ }
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
+ {
+ uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex);
+ crTracking->readCRBits = crBitFlag;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR)
+ {
+ crTracking->readCRBits = 0xFFFFFFFF;
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF)
+ {
+ crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
+ }
+ else if (imlInstruction->type == PPCREC_IML_TYPE_CR)
+ {
+ if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR ||
+ imlInstruction->operation == PPCREC_IML_OP_CR_SET)
+ {
+ uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
+ crTracking->writtenCRBits = crBitFlag;
+ }
+ else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR ||
+ imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
+ imlInstruction->operation == PPCREC_IML_OP_CR_AND ||
+ imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
+ {
+ uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
+ crTracking->writtenCRBits = crBitFlag;
+ crBitFlag = 1 << (imlInstruction->op_cr.crA);
+ crTracking->readCRBits = crBitFlag;
+ crBitFlag = 1 << (imlInstruction->op_cr.crB);
+ crTracking->readCRBits |= crBitFlag;
+ }
+ else
+ assert_dbg();
+ }
+ else if (PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7)
+ {
+ crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4));
+ }
+ else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
+ {
+ // overwrites CR0
+ crTracking->writtenCRBits |= (0xF << 0);
+ }
+}
diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
index e76a53fa..b9685488 100644
--- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
+++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
@@ -1,345 +1,563 @@
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h"
-#include "Cafe/HW/Espresso/EspressoISA.h"
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
-#include "IML/IML.h"
-#include "IML/IMLRegisterAllocatorRanges.h"
-#include "PPCFunctionBoundaryTracker.h"
-#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
+#include "PPCRecompilerX64.h"
+#include "PPCRecompilerImlRanges.h"
+#include "util/helpers/StringBuf.h"
bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext);
+uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext);
+uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset);
-struct PPCBasicBlockInfo
+PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext)
{
- PPCBasicBlockInfo(uint32 startAddress, const std::set& entryAddresses) : startAddress(startAddress), lastAddress(startAddress)
+ if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize )
{
- isEnterable = entryAddresses.find(startAddress) != entryAddresses.end();
+ sint32 newSize = ppcImlGenContext->imlListCount*2 + 2;
+ ppcImlGenContext->imlList = (PPCRecImlInstruction_t*)realloc(ppcImlGenContext->imlList, sizeof(PPCRecImlInstruction_t)*newSize);
+ ppcImlGenContext->imlListSize = newSize;
}
-
- uint32 startAddress;
- uint32 lastAddress; // inclusive
- bool isEnterable{ false };
- bool hasContinuedFlow{ true }; // non-branch path goes to next segment, assumed by default
- bool hasBranchTarget{ false };
- uint32 branchTarget{};
-
- // associated IML segments
- IMLSegment* firstSegment{}; // first segment in chain, used as branch target for other segments
- IMLSegment* appendSegment{}; // last segment in chain, additional instructions should be appended to this segment
-
- void SetInitialSegment(IMLSegment* seg)
- {
- cemu_assert_debug(!firstSegment);
- cemu_assert_debug(!appendSegment);
- firstSegment = seg;
- appendSegment = seg;
- }
-
- IMLSegment* GetFirstSegmentInChain()
- {
- return firstSegment;
- }
-
- IMLSegment* GetSegmentForInstructionAppend()
- {
- return appendSegment;
- }
-};
-
-IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext)
-{
- IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back();
- memset(&inst, 0x00, sizeof(IMLInstruction));
- return &inst;
+ PPCRecImlInstruction_t* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount;
+ memset(imlInstruction, 0x00, sizeof(PPCRecImlInstruction_t));
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default
+ imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction;
+ ppcImlGenContext->imlListCount++;
+ return imlInstruction;
}
-void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
+void PPCRecompilerImlGen_generateNewInstruction_jumpmark(ppcImlGenContext_t* ppcImlGenContext, uint32 address)
{
- cemu_assert_debug(registerMemory1.IsValid());
- cemu_assert_debug(registerMemory2.IsValid());
- cemu_assert_debug(registerDestination.IsValid());
- IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ // no-op that indicates possible destination of a jump
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_JUMPMARK;
+ imlInstruction->op_jumpmark.address = address;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext_t* ppcImlGenContext, uint32 macroId, uint32 param, uint32 param2, uint16 paramU16)
+{
+ // no-op that indicates possible destination of a jump
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_MACRO;
+ imlInstruction->operation = macroId;
+ imlInstruction->op_macro.param = param;
+ imlInstruction->op_macro.param2 = param2;
+ imlInstruction->op_macro.paramU16 = paramU16;
+}
+
+/*
+ * Generates a marker for Interpreter -> Recompiler entrypoints
+ * PPC_ENTER iml instructions have no associated PPC address but the instruction itself has one
+ */
+void PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcAddress)
+{
+ // no-op that indicates possible destination of a jump
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_PPC_ENTER;
+ imlInstruction->operation = 0;
+ imlInstruction->op_ppcEnter.ppcAddress = ppcAddress;
+ imlInstruction->op_ppcEnter.x64Offset = 0;
+ imlInstruction->associatedPPCAddress = 0;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode)
+{
+ // operation with two register operands (e.g. "t0 = t1")
+ if(imlInstruction == NULL)
+ imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_R_R;
+ imlInstruction->operation = operation;
+ imlInstruction->crRegister = crRegister;
+ imlInstruction->crMode = crMode;
+ imlInstruction->op_r_r.registerResult = registerResult;
+ imlInstruction->op_r_r.registerA = registerA;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0)
+{
+ // operation with three register operands (e.g. "t0 = t1 + t4")
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_R_R_R;
+ imlInstruction->operation = operation;
+ imlInstruction->crRegister = crRegister;
+ imlInstruction->crMode = crMode;
+ imlInstruction->op_r_r_r.registerResult = registerResult;
+ imlInstruction->op_r_r_r.registerA = registerA;
+ imlInstruction->op_r_r_r.registerB = registerB;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0)
+{
+ // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234")
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_R_R_S32;
+ imlInstruction->operation = operation;
+ imlInstruction->crRegister = crRegister;
+ imlInstruction->crMode = crMode;
+ imlInstruction->op_r_r_s32.registerResult = registerResult;
+ imlInstruction->op_r_r_s32.registerA = registerA;
+ imlInstruction->op_r_r_s32.immS32 = immS32;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name, uint32 copyWidth, bool signExtend, bool bigEndian)
+{
+ // Store name (e.g. "'r3' = t0" which translates to MOV [ESP+offset_r3], reg32)
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_NAME_R;
+ imlInstruction->operation = operation;
+ imlInstruction->op_r_name.registerIndex = registerIndex;
+ imlInstruction->op_r_name.name = name;
+ imlInstruction->op_r_name.copyWidth = copyWidth;
+ imlInstruction->op_r_name.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(bigEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0);
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode)
+{
+ // two variations:
+ // operation without store (e.g. "'r3' < 123" which has no effect other than updating a condition flags register)
+ // operation with store (e.g. "'r3' = 123")
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_R_S32;
+ imlInstruction->operation = operation;
+ imlInstruction->crRegister = crRegister;
+ imlInstruction->crMode = crMode;
+ imlInstruction->op_r_immS32.registerIndex = registerIndex;
+ imlInstruction->op_r_immS32.immS32 = immS32;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
+{
+ if(imlInstruction == NULL)
+ imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ else
+ memset(imlInstruction, 0, sizeof(PPCRecImlInstruction_t));
+ imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32;
+ imlInstruction->operation = operation;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ // r_s32 operation
+ imlInstruction->op_conditional_r_s32.registerIndex = registerIndex;
+ imlInstruction->op_conditional_r_s32.immS32 = immS32;
+ // condition
+ imlInstruction->op_conditional_r_s32.crRegisterIndex = crRegisterIndex;
+ imlInstruction->op_conditional_r_s32.crBitIndex = crBitIndex;
+ imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet;
+}
+
+
+void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress)
+{
+ // jump
+ if (imlInstruction == NULL)
+ imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ else
+ memset(imlInstruction, 0, sizeof(PPCRecImlInstruction_t));
+ imlInstruction->type = PPCREC_IML_TYPE_CJUMP;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress;
+ imlInstruction->op_conditionalJump.jumpAccordingToSegment = false;
+ imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE;
+ imlInstruction->op_conditionalJump.crRegisterIndex = 0;
+ imlInstruction->op_conditionalJump.crBitIndex = 0;
+ imlInstruction->op_conditionalJump.bitMustBeSet = false;
+}
+
+// jump based on segment branches
+void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction)
+{
+ // jump
+ if (imlInstruction == NULL)
+ imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->associatedPPCAddress = 0;
+ imlInstruction->type = PPCREC_IML_TYPE_CJUMP;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ imlInstruction->op_conditionalJump.jumpmarkAddress = 0;
+ imlInstruction->op_conditionalJump.jumpAccordingToSegment = true;
+ imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE;
+ imlInstruction->op_conditionalJump.crRegisterIndex = 0;
+ imlInstruction->op_conditionalJump.crBitIndex = 0;
+ imlInstruction->op_conditionalJump.bitMustBeSet = false;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction)
+{
+ if (imlInstruction == NULL)
+ imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_NO_OP;
+ imlInstruction->operation = 0;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ imlInstruction->crMode = 0;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB)
+{
+ // multiple variations:
+ // operation involving only one cr bit (like clear crD bit)
+ // operation involving three cr bits (like crD = crA or crB)
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_CR;
+ imlInstruction->operation = operation;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ imlInstruction->crMode = 0;
+ imlInstruction->op_cr.crD = crD;
+ imlInstruction->op_cr.crA = crA;
+ imlInstruction->op_cr.crB = crB;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
+{
+ // conditional jump
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_CJUMP;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress;
+ imlInstruction->op_conditionalJump.condition = jumpCondition;
+ imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex;
+ imlInstruction->op_conditionalJump.crBitIndex = crBitIndex;
+ imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
+{
+ // load from memory
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_LOAD;
+ imlInstruction->operation = 0;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ imlInstruction->op_storeLoad.registerData = registerDestination;
+ imlInstruction->op_storeLoad.registerMem = registerMemory;
+ imlInstruction->op_storeLoad.immS32 = immS32;
+ imlInstruction->op_storeLoad.copyWidth = copyWidth;
+ //imlInstruction->op_storeLoad.flags = (signExtend ? PPCREC_IML_OP_FLAG_SIGNEXTEND : 0) | (switchEndian ? PPCREC_IML_OP_FLAG_SWITCHENDIAN : 0);
+ imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian;
+ imlInstruction->op_storeLoad.flags2.signExtend = signExtend;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
+{
+ // load from memory
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED;
imlInstruction->operation = 0;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->op_storeLoad.registerData = registerDestination;
imlInstruction->op_storeLoad.registerMem = registerMemory1;
imlInstruction->op_storeLoad.registerMem2 = registerMemory2;
imlInstruction->op_storeLoad.copyWidth = copyWidth;
+ //imlInstruction->op_storeLoad.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0);
imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian;
imlInstruction->op_storeLoad.flags2.signExtend = signExtend;
}
-void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
+void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian)
{
- cemu_assert_debug(registerMemory1.IsValid());
- cemu_assert_debug(registerMemory2.IsValid());
- cemu_assert_debug(registerDestination.IsValid());
- IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ // load from memory
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_STORE;
+ imlInstruction->operation = 0;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ imlInstruction->op_storeLoad.registerData = registerSource;
+ imlInstruction->op_storeLoad.registerMem = registerMemory;
+ imlInstruction->op_storeLoad.immS32 = immS32;
+ imlInstruction->op_storeLoad.copyWidth = copyWidth;
+ //imlInstruction->op_storeLoad.flags = (switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0);
+ imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian;
+ imlInstruction->op_storeLoad.flags2.signExtend = false;
+}
+
+void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
+{
+ // load from memory
+ PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED;
imlInstruction->operation = 0;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->op_storeLoad.registerData = registerDestination;
imlInstruction->op_storeLoad.registerMem = registerMemory1;
imlInstruction->op_storeLoad.registerMem2 = registerMemory2;
imlInstruction->op_storeLoad.copyWidth = copyWidth;
+ //imlInstruction->op_storeLoad.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0);
imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian;
imlInstruction->op_storeLoad.flags2.signExtend = signExtend;
}
-// create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards
-void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken)
+void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth)
{
- IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
-
- std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 3);
- IMLSegment* segBranchNotTaken = segments[0];
- IMLSegment* segBranchTaken = segments[1];
- IMLSegment* segMerge = segments[2];
-
- // link the segments
- segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
- segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
- currentWriteSegment->SetLinkBranchTaken(segBranchTaken);
- currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken);
- segBranchTaken->SetLinkBranchNotTaken(segMerge);
- segBranchNotTaken->SetLinkBranchTaken(segMerge);
- // generate code for branch taken segment
- ppcImlGenContext.currentOutputSegment = segBranchTaken;
- genSegmentBranchTaken(ppcImlGenContext);
- cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchTaken);
- // generate code for branch not taken segment
- ppcImlGenContext.currentOutputSegment = segBranchNotTaken;
- genSegmentBranchNotTaken(ppcImlGenContext);
- cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken);
- ppcImlGenContext.emitInst().make_jump();
- // make merge segment the new write segment
- ppcImlGenContext.currentOutputSegment = segMerge;
- basicBlockInfo.appendSegment = segMerge;
+ // copy from memory to memory
+ if(imlInstruction == NULL)
+ imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
+ imlInstruction->type = PPCREC_IML_TYPE_MEM2MEM;
+ imlInstruction->operation = 0;
+ imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
+ imlInstruction->op_mem2mem.src.registerMem = srcMemReg;
+ imlInstruction->op_mem2mem.src.immS32 = srcImmS32;
+ imlInstruction->op_mem2mem.dst.registerMem = dstMemReg;
+ imlInstruction->op_mem2mem.dst.immS32 = dstImmS32;
+ imlInstruction->op_mem2mem.copyWidth = copyWidth;
}
-void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchNotTaken)
+uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
- IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
-
- std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 2);
- IMLSegment* segBranchNotTaken = segments[0];
- IMLSegment* segMerge = segments[1];
-
- // link the segments
- segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
- segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
- currentWriteSegment->SetLinkBranchTaken(segMerge);
- currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken);
- segBranchNotTaken->SetLinkBranchNotTaken(segMerge);
- // generate code for branch not taken segment
- ppcImlGenContext.currentOutputSegment = segBranchNotTaken;
- genSegmentBranchNotTaken(ppcImlGenContext);
- cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken);
- // make merge segment the new write segment
- ppcImlGenContext.currentOutputSegment = segMerge;
- basicBlockInfo.appendSegment = segMerge;
-}
-
-IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index);
-
-IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext)
-{
- m_prevSegment = ppcImlGenContext->currentOutputSegment;
- cemu_assert_debug(ppcImlGenContext->currentOutputSegment == ppcImlGenContext->currentBasicBlock->appendSegment);
- if (outputSegment == ppcImlGenContext->currentOutputSegment)
+ if( mappedName == PPCREC_NAME_NONE )
{
- m_prevSegment = nullptr;
- return;
+ debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(): Invalid mappedName parameter\n");
+ return PPC_REC_INVALID_REGISTER;
}
- m_context->currentBasicBlock->appendSegment = outputSegment;
- m_context->currentOutputSegment = outputSegment;
-}
-
-IMLRedirectInstOutput::~IMLRedirectInstOutput()
-{
- if (m_prevSegment)
+ for(uint32 i=0; i<(PPC_REC_MAX_VIRTUAL_GPR-1); i++)
{
- m_context->currentBasicBlock->appendSegment = m_prevSegment;
- m_context->currentOutputSegment = m_prevSegment;
- }
-}
-
-// compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count
-void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex)
-{
- IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
- cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction
-
- const sint32 numBranchSegments = count + 1;
- const sint32 numCaseSegments = count;
-
- std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1);
- IMLSegment** extraBranchSegments = segments.data();
- IMLSegment** caseSegments = segments.data() + numBranchSegments - 1;
- IMLSegment* mergeSegment = segments[numBranchSegments - 1 + numCaseSegments];
-
- // move links to the merge segment
- mergeSegment->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
- mergeSegment->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
- currentWriteSegment->SetLinkBranchTaken(nullptr);
- currentWriteSegment->SetLinkBranchNotTaken(nullptr);
-
- for (sint32 i=0; imappedRegister[i] == PPCREC_NAME_NONE )
{
- cemu_assert_debug(i < numCaseSegments);
- seg->SetLinkBranchTaken(caseSegments[i]);
- seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1));
- seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ);
- seg->AppendInstruction()->make_conditional_jump(tmpBoolReg, true);
- }
- else
- {
- cemu_assert_debug(defaultCaseIndex < numCaseSegments);
- seg->SetLinkBranchTaken(caseSegments[defaultCaseIndex]);
- seg->AppendInstruction()->make_jump();
+ ppcImlGenContext->mappedRegister[i] = mappedName;
+ return i;
}
}
- // link case segments
- for (sint32 i=0; iSetLinkBranchTaken(mergeSegment);
- // -> Jumps are added after the instructions
- }
- else
- {
- seg->SetLinkBranchTaken(mergeSegment);
- }
- }
- ppcImlGenContext.currentOutputSegment = mergeSegment;
- basicBlockInfo.appendSegment = mergeSegment;
-}
-
-IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat)
-{
- auto it = ppcImlGenContext->mappedRegs.find(mappedName);
- if (it != ppcImlGenContext->mappedRegs.end())
- return it->second;
- // create new reg entry
- IMLRegFormat baseFormat;
- if (regFormat == IMLRegFormat::F64)
- baseFormat = IMLRegFormat::F64;
- else if (regFormat == IMLRegFormat::I32)
- baseFormat = IMLRegFormat::I64;
- else
- {
- cemu_assert_suspicious();
- }
- IMLRegID newRegId = ppcImlGenContext->mappedRegs.size();
- IMLReg newReg(baseFormat, regFormat, 0, newRegId);
- ppcImlGenContext->mappedRegs.try_emplace(mappedName, newReg);
- return newReg;
-}
-
-IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg)
-{
- for (auto& it : ppcImlGenContext->mappedRegs)
- {
- if (it.second.GetRegID() == reg.GetRegID())
- return it.first;
- }
- cemu_assert(false);
return 0;
}
+uint32 PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
+{
+ for(uint32 i=0; i< PPC_REC_MAX_VIRTUAL_GPR; i++)
+ {
+ if( ppcImlGenContext->mappedRegister[i] == mappedName )
+ {
+ return i;
+ }
+ }
+ return PPC_REC_INVALID_REGISTER;
+}
+
uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
- DEBUG_BREAK;
- //if( mappedName == PPCREC_NAME_NONE )
- //{
- // debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n");
- // return PPC_REC_INVALID_REGISTER;
- //}
- //for(uint32 i=0; i<255; i++)
- //{
- // if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE )
- // {
- // ppcImlGenContext->mappedFPRRegister[i] = mappedName;
- // return i;
- // }
- //}
+ if( mappedName == PPCREC_NAME_NONE )
+ {
+ debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n");
+ return PPC_REC_INVALID_REGISTER;
+ }
+ for(uint32 i=0; i<255; i++)
+ {
+ if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE )
+ {
+ ppcImlGenContext->mappedFPRRegister[i] = mappedName;
+ return i;
+ }
+ }
return 0;
}
uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
- DEBUG_BREAK;
- //for(uint32 i=0; i<255; i++)
- //{
- // if( ppcImlGenContext->mappedFPRRegister[i] == mappedName )
- // {
- // return i;
- // }
- //}
+ for(uint32 i=0; i<255; i++)
+ {
+ if( ppcImlGenContext->mappedFPRRegister[i] == mappedName )
+ {
+ return i;
+ }
+ }
return PPC_REC_INVALID_REGISTER;
}
-IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
+/*
+ * Loads a PPC gpr into any of the available IML registers
+ * If loadNew is false, it will reuse already loaded instances
+ */
+uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew)
{
- return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::I32);
+ if( loadNew == false )
+ {
+ uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName);
+ if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
+ return loadedRegisterIndex;
+ }
+ uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName);
+ return registerIndex;
}
-IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
+/*
+ * Reuse already loaded register if present
+ * Otherwise create new IML register and map the name. The register contents will be undefined
+ */
+uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
- cemu_assert_debug(index < 32);
- return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + index);
+ uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName);
+ if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
+ return loadedRegisterIndex;
+ uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName);
+ return registerIndex;
}
-IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
+/*
+ * Loads a PPC fpr into any of the available IML FPU registers
+ * If loadNew is false, it will check first if the fpr is already loaded into any IML register
+ */
+uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew)
{
- cemu_assert_debug(index < 32);
- return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + index);
+ if( loadNew == false )
+ {
+ uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName);
+ if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
+ return loadedRegisterIndex;
+ }
+ uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName);
+ return registerIndex;
}
-IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit)
+/*
+ * Checks if a PPC fpr register is already loaded into any IML register
+ * If no, it will create a new undefined temporary IML FPU register and map the name (effectively overwriting the old ppc register)
+ */
+uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
- cemu_assert_debug(crReg < 8);
- cemu_assert_debug(crBit < 4);
- return _GetRegCR(ppcImlGenContext, (crReg * 4) + crBit);
+ uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName);
+ if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
+ return loadedRegisterIndex;
+ uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName);
+ return registerIndex;
}
-IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
+void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- cemu_assert_debug(index < 4);
- return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index);
+//#ifdef CEMU_DEBUG_ASSERT
+// PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0);
+//#endif
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0);
}
-// get throw-away register
-// be careful to not collide with other temporary register
-IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
+bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- cemu_assert_debug(index < 4);
- return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index);
+ uint32 rD, spr1, spr2, spr;
+ PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
+ spr = spr1 | (spr2<<5);
+ if (spr == SPR_CTR || spr == SPR_LR)
+ {
+ uint32 gprReg = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ if (gprReg == PPC_REC_INVALID_REGISTER)
+ gprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, sprReg, gprReg);
+ }
+ else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7)
+ {
+ uint32 gprReg = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ if (gprReg == PPC_REC_INVALID_REGISTER)
+ gprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, sprReg, gprReg);
+ ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true;
+ }
+ else
+ return false;
+ return true;
+}
+
+bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ uint32 rD, spr1, spr2, spr;
+ PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
+ spr = spr1 | (spr2<<5);
+ if (spr == SPR_LR || spr == SPR_CTR)
+ {
+ uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
+ uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprReg, sprReg);
+ }
+ else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7)
+ {
+ uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
+ uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprReg, sprReg);
+ }
+ else
+ return false;
+ return true;
+}
+
+bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ uint32 rD, spr1, spr2, spr;
+ PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
+ spr = spr1 | (spr2<<5);
+
+ if (spr == 268 || spr == 269)
+ {
+ // TBL / TBU
+ uint32 param2 = spr | (rD << 16);
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0);
+ return true;
+ }
+ return false;
+}
+
+bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rD, rA, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ return true;
+}
+
+bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ uint32 rS;
+ uint32 crMask;
+ PPC_OPC_TEMPL_XFX(opcode, rS, crMask);
+ uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ return true;
+}
+
+void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ uint32 cr;
+ int rA, rB;
+ PPC_OPC_TEMPL_X(opcode, cr, rA, rB);
+ cr >>= 2;
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_COMPARE_SIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_SIGNED);
+}
+
+void PPCRecompilerImlGen_CMPL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ uint32 cr;
+ int rA, rB;
+ PPC_OPC_TEMPL_X(opcode, cr, rA, rB);
+ cr >>= 2;
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED);
+}
+
+void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ uint32 cr;
+ int rA;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm);
+ cr >>= 2;
+ sint32 b = imm;
+ // load gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_SIGNED);
+}
+
+void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ uint32 cr;
+ int rA;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm);
+ cr >>= 2;
+ uint32 b = imm;
+ // load gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED);
}
bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount)
{
for (sint32 i = 0; i < 6; i++)
{
- uint32 opcode = memory_readU32(functionPtr + i * 4);
+ uint32 opcode = memory_readU32(functionPtr+i*4);
switch ((opcode >> 26))
{
case 14: // ADDI
@@ -393,220 +611,18 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin
{
for (sint32 i = 0; i < instructionCount; i++)
{
- ppcImlGenContext->ppcAddressOfCurrentInstruction = startAddress + i * 4;
+ ppcImlGenContext->ppcAddressOfCurrentInstruction = startAddress + i*4;
ppcImlGenContext->cyclesSinceLastBranch++;
if (PPCRecompiler_decodePPCInstruction(ppcImlGenContext))
{
- cemu_assert_suspicious();
+ assert_dbg();
}
}
// add range
- cemu_assert_unimplemented();
- //ppcRecRange_t recRange;
- //recRange.ppcAddress = startAddress;
- //recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR
- //ppcImlGenContext->functionRef->list_ranges.push_back(recRange);
-}
-
-// for handling RC bit of many instructions
-void PPCImlGen_UpdateCR0(ppcImlGenContext_t* ppcImlGenContext, IMLReg regR)
-{
- IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT);
- IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT);
- IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ);
- // todo - SO bit
-
- ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegLT, IMLCondition::SIGNED_LT);
- ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegGT, IMLCondition::SIGNED_GT);
- ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegEQ, IMLCondition::EQ);
-
- //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, crBitRegSO, 0); // todo - copy from XER
-
- //ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL);
-}
-
-void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- // split before and after to make sure the macro is in an isolated segment that we can make enterable
- PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock);
- ppcImlGenContext->currentOutputSegment->SetEnterable(ppcImlGenContext->ppcAddressOfCurrentInstruction);
- PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID);
- IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock);
- middleSeg->SetLinkBranchTaken(nullptr);
- middleSeg->SetLinkBranchNotTaken(nullptr);
-}
-
-bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- uint32 rD, spr1, spr2, spr;
- PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
- spr = spr1 | (spr2<<5);
- IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD);
- if (spr == SPR_CTR || spr == SPR_LR)
- {
- IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg);
- }
- else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7)
- {
- IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg);
- ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true;
- }
- else
- return false;
- return true;
-}
-
-bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- uint32 rD, spr1, spr2, spr;
- PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
- spr = spr1 | (spr2<<5);
- IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD);
- if (spr == SPR_LR || spr == SPR_CTR)
- {
- IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg);
- }
- else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7)
- {
- IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg);
- }
- else
- return false;
- return true;
-}
-
-ATTR_MS_ABI uint32 PPCRecompiler_GetTBL()
-{
- return (uint32)coreinit::OSGetSystemTime();
-}
-
-ATTR_MS_ABI uint32 PPCRecompiler_GetTBU()
-{
- return (uint32)(coreinit::OSGetSystemTime() >> 32);
-}
-
-bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- uint32 rD, spr1, spr2, spr;
- PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
- spr = spr1 | (spr2<<5);
-
- if( spr == SPR_TBL || spr == SPR_TBU )
- {
- IMLReg resultReg = _GetRegGPR(ppcImlGenContext, rD);
- ppcImlGenContext->emitInst().make_call_imm(spr == SPR_TBL ? (uintptr_t)PPCRecompiler_GetTBL : (uintptr_t)PPCRecompiler_GetTBU, IMLREG_INVALID, IMLREG_INVALID, IMLREG_INVALID, resultReg);
- return true;
- }
- return false;
-}
-
-void PPCRecompilerImlGen_MCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- uint32 crD, crS, b;
- PPC_OPC_TEMPL_X(opcode, crD, crS, b);
- cemu_assert_debug((crD&3) == 0);
- cemu_assert_debug((crS&3) == 0);
- crD >>= 2;
- crS >>= 2;
- for (sint32 i = 0; i<4; i++)
- {
- IMLReg regCrSrcBit = _GetRegCR(ppcImlGenContext, crS * 4 + i);
- IMLReg regCrDstBit = _GetRegCR(ppcImlGenContext, crD * 4 + i);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrDstBit, regCrSrcBit);
- }
-}
-
-bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- sint32 rD, rA, rB;
- PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, 0);
- for (sint32 i = 0; i < 32; i++)
- {
- IMLReg regCrBit = _GetRegCR(ppcImlGenContext, i);
- cemu_assert_debug(regCrBit.GetRegFormat() == IMLRegFormat::I32); // addition is only allowed between same-format regs
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regD, regD, 1);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regD, regD, regCrBit);
- }
- return true;
-}
-
-bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- uint32 rS;
- uint32 crMask;
- PPC_OPC_TEMPL_XFX(opcode, rS, crMask);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
- uint32 crBitMask = ppc_MTCRFMaskToCRBitMask(crMask);
- for (sint32 f = 0; f < 32; f++)
- {
- if(((crBitMask >> f) & 1) == 0)
- continue;
- IMLReg regCrBit = _GetRegCR(ppcImlGenContext, f);
- cemu_assert_debug(regCrBit.GetRegFormat() == IMLRegFormat::I32);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regTmp, regS, (31-f));
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regCrBit, regTmp, 1);
- }
- return true;
-}
-
-void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned)
-{
- uint32 cr;
- int rA, rB;
- PPC_OPC_TEMPL_X(opcode, cr, rA, rB);
- cr >>= 2;
-
- IMLReg gprRegisterA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg gprRegisterB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO);
-
- IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT);
- IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT);
- IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ);
- IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO);
-
- ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT);
- ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT);
- ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegEQ, IMLCondition::EQ);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO);
-}
-
-bool PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned)
-{
- uint32 cr;
- int rA;
- uint32 imm;
- if (isUnsigned)
- {
- PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm);
- }
- else
- {
- PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm);
- }
- cr >>= 2;
-
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO);
-
- IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT);
- IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT);
- IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ);
- IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO);
-
- ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT);
- ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT);
- ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegEQ, IMLCondition::EQ);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO);
-
- return true;
+ ppcRecRange_t recRange;
+ recRange.ppcAddress = startAddress;
+ recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR
+ ppcImlGenContext->functionRef->list_ranges.push_back(recRange);
}
bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@@ -621,26 +637,43 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
if( opcode&PPC_OPC_LK )
{
// function call
- ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID);
+ // check if function can be inlined
+ sint32 inlineFuncInstructionCount = 0;
+ if (PPCRecompiler_canInlineFunction(jumpAddressDest, &inlineFuncInstructionCount))
+ {
+ // generate NOP iml instead of BL macro (this assures that segment PPC range remains intact)
+ PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext, NULL);
+ //cemuLog_log(LogType::Force, "Inline func 0x{:08x} at {:08x}", jumpAddressDest, ppcImlGenContext->ppcAddressOfCurrentInstruction);
+ uint32* prevInstructionPtr = ppcImlGenContext->currentInstruction;
+ ppcImlGenContext->currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(jumpAddressDest);
+ PPCRecompiler_generateInlinedCode(ppcImlGenContext, jumpAddressDest, inlineFuncInstructionCount);
+ ppcImlGenContext->currentInstruction = prevInstructionPtr;
+ return true;
+ }
+ // generate funtion call instructions
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
+ PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4);
return true;
}
// is jump destination within recompiled function?
- if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest))
- ppcImlGenContext->emitInst().make_jump();
+ if( jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize) )
+ {
+ // generate instruction
+ PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext, NULL, jumpAddressDest);
+ }
else
- ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID);
+ {
+ // todo: Inline this jump destination if possible (in many cases it's a bunch of GPR/FPR store instructions + BLR)
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
+ }
return true;
}
bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext);
-
uint32 BO, BI, BD;
PPC_OPC_TEMPL_B(opcode, BO, BI, BD);
- Espresso::BOField boField(BO);
-
uint32 crRegister = BI/4;
uint32 crBit = BI%4;
uint32 jumpCondition = 0;
@@ -649,10 +682,6 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0
bool ignoreCondition = (BO&16)!=0;
- IMLReg regCRBit;
- if (!ignoreCondition)
- regCRBit = _GetRegCR(ppcImlGenContext, crRegister, crBit);
-
uint32 jumpAddressDest = BD;
if( (opcode&PPC_OPC_AA) == 0 )
{
@@ -661,15 +690,37 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
if( opcode&PPC_OPC_LK )
{
- if (useDecrementer)
- return false;
// conditional function calls are not supported
if( ignoreCondition == false )
{
- PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock;
- IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock);
- ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, conditionMustBeTrue);
- blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID);
+ // generate jump condition
+ if( conditionMustBeTrue )
+ {
+ if( crBit == 0 )
+ jumpCondition = PPCREC_JUMP_CONDITION_GE;
+ else if( crBit == 1 )
+ jumpCondition = PPCREC_JUMP_CONDITION_LE;
+ else if( crBit == 2 )
+ jumpCondition = PPCREC_JUMP_CONDITION_NE;
+ else if( crBit == 3 )
+ jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW;
+ }
+ else
+ {
+ if( crBit == 0 )
+ jumpCondition = PPCREC_JUMP_CONDITION_L;
+ else if( crBit == 1 )
+ jumpCondition = PPCREC_JUMP_CONDITION_G;
+ else if( crBit == 2 )
+ jumpCondition = PPCREC_JUMP_CONDITION_E;
+ else if( crBit == 3 )
+ jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW;
+ }
+ // generate instruction
+ //PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0);
+ PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, !conditionMustBeTrue);
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
+ PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4);
return true;
}
return false;
@@ -679,11 +730,12 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
if( ignoreCondition == false )
return false; // not supported for the moment
- IMLReg ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR);
- IMLReg tmpBoolReg = _GetRegTemporaryS8(ppcImlGenContext, 1);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1);
- ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ);
- ppcImlGenContext->emitInst().make_conditional_jump(tmpBoolReg, true);
+ uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUB, ctrRegister, 1, 0, false, false, PPCREC_CR_REG_TEMP, PPCREC_CR_MODE_ARITHMETIC);
+ if( decrementerMustBeZero )
+ PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false);
return true;
}
else
@@ -691,90 +743,219 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
if( ignoreCondition )
{
// branch always, no condition and no decrementer
- // not supported
- return false;
+ debugBreakpoint();
+ crRegister = PPC_REC_INVALID_REGISTER; // not necessary but lets optimizer know we dont care for cr register on this instruction
}
else
{
- if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest))
+ // generate jump condition
+ if( conditionMustBeTrue )
+ {
+ if( crBit == 0 )
+ jumpCondition = PPCREC_JUMP_CONDITION_GE;
+ else if( crBit == 1 )
+ jumpCondition = PPCREC_JUMP_CONDITION_LE;
+ else if( crBit == 2 )
+ jumpCondition = PPCREC_JUMP_CONDITION_NE;
+ else if( crBit == 3 )
+ jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW;
+ }
+ else
+ {
+ if( crBit == 0 )
+ jumpCondition = PPCREC_JUMP_CONDITION_L;
+ else if( crBit == 1 )
+ jumpCondition = PPCREC_JUMP_CONDITION_G;
+ else if( crBit == 2 )
+ jumpCondition = PPCREC_JUMP_CONDITION_E;
+ else if( crBit == 3 )
+ jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW;
+ }
+
+ if (jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize))
{
// near jump
- ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, conditionMustBeTrue);
+ PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, jumpCondition, crRegister, crBit, conditionMustBeTrue);
}
else
{
// far jump
- debug_printf("PPCRecompilerImlGen_BC(): Far jump not supported yet");
- return false;
+ PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue);
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
+ PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4);
}
}
}
return true;
}
-// BCCTR or BCLR
-bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 sprReg)
+bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext);
+ uint32 BO, BI, BD;
+ PPC_OPC_TEMPL_XL(opcode, BO, BI, BD);
- Espresso::BOField BO;
- uint32 BI;
- bool LK;
- Espresso::decodeOp_BCSPR(opcode, BO, BI, LK);
uint32 crRegister = BI/4;
uint32 crBit = BI%4;
- IMLReg regCRBit;
- if (!BO.conditionIgnore())
- regCRBit = _GetRegCR(ppcImlGenContext, crRegister, crBit);
+ uint32 jumpCondition = 0;
- IMLReg branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg);
- if (LK)
+ bool conditionMustBeTrue = (BO&8)!=0;
+ bool useDecrementer = (BO&4)==0; // bit not set -> decrement
+ bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0
+ bool ignoreCondition = (BO&16)!=0;
+ bool saveLR = (opcode&PPC_OPC_LK)!=0;
+ // since we skip this instruction if the condition is true, we need to invert the logic
+ bool invertedConditionMustBeTrue = !conditionMustBeTrue;
+ if( useDecrementer )
{
- if (sprReg == SPR_LR)
- {
- // if the branch target is LR, then preserve it in a temporary
- cemu_assert_suspicious(); // this case needs testing
- IMLReg tmpRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpRegister, branchDestReg);
- branchDestReg = tmpRegister;
- }
- IMLReg registerLR = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4);
- }
-
- if (!BO.decrementerIgnore())
- {
- cemu_assert_unimplemented();
- return false;
- }
- else if (!BO.conditionIgnore())
- {
- // no decrementer but CR check
- cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow);
- cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget);
- PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock;
- IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock);
- ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, !BO.conditionInverted());
- bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, 0, 0, 0, branchDestReg);
+ cemu_assert_debug(false);
+ return false; // unsupported
}
else
{
- // branch always, no condition and no decrementer check
- cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow);
- cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget);
- ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_TO_REG, 0, 0, 0, branchDestReg);
+ if( ignoreCondition )
+ {
+ // store LR
+ if( saveLR )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch);
+ PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4);
+ }
+ else
+ {
+ // branch always, no condition and no decrementer
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch);
+ }
+ }
+ else
+ {
+ // store LR
+ if( saveLR )
+ {
+ uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ }
+ // generate jump condition
+ if( invertedConditionMustBeTrue )
+ {
+ if( crBit == 0 )
+ jumpCondition = PPCREC_JUMP_CONDITION_L;
+ else if( crBit == 1 )
+ jumpCondition = PPCREC_JUMP_CONDITION_G;
+ else if( crBit == 2 )
+ jumpCondition = PPCREC_JUMP_CONDITION_E;
+ else if( crBit == 3 )
+ jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW;
+ }
+ else
+ {
+ if( crBit == 0 )
+ jumpCondition = PPCREC_JUMP_CONDITION_GE;
+ else if( crBit == 1 )
+ jumpCondition = PPCREC_JUMP_CONDITION_LE;
+ else if( crBit == 2 )
+ jumpCondition = PPCREC_JUMP_CONDITION_NE;
+ else if( crBit == 3 )
+ jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW;
+ }
+ // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction)
+ PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue);
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch);
+ }
+ }
+ return true;
+}
+
+bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ uint32 BO, BI, BD;
+ PPC_OPC_TEMPL_XL(opcode, BO, BI, BD);
+
+ uint32 crRegister = BI/4;
+ uint32 crBit = BI%4;
+
+ uint32 jumpCondition = 0;
+
+ bool conditionMustBeTrue = (BO&8)!=0;
+ bool useDecrementer = (BO&4)==0; // bit not set -> decrement
+ bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0
+ bool ignoreCondition = (BO&16)!=0;
+ bool saveLR = (opcode&PPC_OPC_LK)!=0;
+ // since we skip this instruction if the condition is true, we need to invert the logic
+ bool invertedConditionMustBeTrue = !conditionMustBeTrue;
+ if( useDecrementer )
+ {
+ assert_dbg();
+ // if added, dont forget inverted logic
+ debug_printf("Rec: BCLR unsupported decrementer\n");
+ return false; // unsupported
+ }
+ else
+ {
+ if( ignoreCondition )
+ {
+ // store LR
+ if( saveLR )
+ {
+ uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch);
+ PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4);
+ }
+ else
+ {
+ // branch always, no condition and no decrementer
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch);
+ }
+ }
+ else
+ {
+ // store LR
+ if( saveLR )
+ {
+ uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ }
+ // generate jump condition
+ if( invertedConditionMustBeTrue )
+ {
+ if( crBit == 0 )
+ jumpCondition = PPCREC_JUMP_CONDITION_L;
+ else if( crBit == 1 )
+ jumpCondition = PPCREC_JUMP_CONDITION_G;
+ else if( crBit == 2 )
+ jumpCondition = PPCREC_JUMP_CONDITION_E;
+ else if( crBit == 3 )
+ jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW;
+ }
+ else
+ {
+ if( crBit == 0 )
+ jumpCondition = PPCREC_JUMP_CONDITION_GE;
+ else if( crBit == 1 )
+ jumpCondition = PPCREC_JUMP_CONDITION_LE;
+ else if( crBit == 2 )
+ jumpCondition = PPCREC_JUMP_CONDITION_NE;
+ else if( crBit == 3 )
+ jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW;
+ }
+ // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction)
+ PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue);
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch);
+ }
}
return true;
}
bool PPCRecompilerImlGen_ISYNC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
+ // does not need to be translated
return true;
}
bool PPCRecompilerImlGen_SYNC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
+ // does not need to be translated
return true;
}
@@ -782,12 +963,102 @@ bool PPCRecompilerImlGen_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regD, regA, regB);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ //hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB];
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB);
+ }
+ return true;
+}
+
+bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rD, rA, rB;
+ PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
+ //hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; -> Update carry
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB);
+ return true;
+}
+
+bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rD, rA, rB;
+ PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
+ // hCPU->gpr[rD] = hCPU->gpr[rA] + hCPU->gpr[rB] + ca;
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA);
+ return true;
+}
+
+bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rD, rA, rB;
+ PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
+ PPC_ASSERT(rB == 0);
+ //uint32 a = hCPU->gpr[rA];
+ //uint32 ca = hCPU->xer_ca;
+ //hCPU->gpr[rD] = a + ca;
+
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ // move rA to rD
+ if( registerRA != registerRD )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRD, registerRA);
+ }
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD);
+ }
+ return true;
+}
+
+bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rD, rA, rB;
+ PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
+ PPC_ASSERT(rB == 0);
+ //uint32 a = hCPU->gpr[rA];
+ //uint32 ca = hCPU->xer_ca;
+ //hCPU->gpr[rD] = a + ca + -1;
+
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ // move rA to rD
+ if( registerRA != registerRD )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRD, registerRA);
+ }
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD);
+ }
return true;
}
@@ -796,16 +1067,22 @@ bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- if (rA != 0)
+ //hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm;
+ if( rA != 0 )
{
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regD, regA, imm);
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if rD is already loaded, else use new temporary register
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, imm);
}
else
{
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, imm);
+ // rA not used, instruction is value assignment
+ // rD = imm
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
}
+ // never updates any cr
return true;
}
@@ -814,88 +1091,49 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- if (rA != 0)
+ if( rA != 0 )
{
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm);
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if rD is already loaded, else use new temporary register
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, (sint32)imm);
}
else
{
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, (sint32)imm);
+ // rA not used, instruction turns into simple value assignment
+ // rD = imm
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
}
+ // never updates any cr
return true;
}
-bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- // r = a + b -> update carry
- sint32 rD, rA, rB;
- PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regRB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD, regRD, regRA, regRB, regCa);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regRD);
- return true;
-}
-
-bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool updateCR0)
+bool PPCRecompilerImlGen_ADDIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm, regCa);
- if(updateCR0)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ // rD = rA + imm;
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if rD is already loaded, else use new temporary register
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm);
+ // never updates any cr
return true;
}
-bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- // r = a + b + carry -> update carry
- sint32 rD, rA, rB;
- PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regRB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, regRB, regCa);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regRD);
- return true;
-}
-
-bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- // r = a + carry -> update carry
- sint32 rD, rA, rB;
- PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, 0, regCa);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regRD);
- return true;
-}
-
-bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- // r = a + 0xFFFFFFFF + carry -> update carry
- sint32 rD, rA, rB;
- PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, -1, regCa);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regRD);
+ // this opcode is identical to ADDIC but additionally it updates CR0
+ sint32 rD, rA;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ // rD = rA + imm;
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if rD is already loaded, else use new temporary register
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm, 0, PPCREC_CR_MODE_LOGICAL);
return true;
}
@@ -903,79 +1141,74 @@ bool PPCRecompilerImlGen_SUBF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- // rD = ~rA + rB + 1
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, regD, regB, regA);
- if ((opcode & PPC_OPC_RC))
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
+ // rD = rB - rA
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA);
return true;
}
bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- // d = ~a + b + ca;
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA);
- ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + ca;
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA);
return true;
}
bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- // d = ~a + ca;
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA);
- ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, 0, regCa);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ if( rB != 0 )
+ debugBreakpoint();
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA);
return true;
}
bool PPCRecompilerImlGen_SUBFC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- // d = ~a + b + 1;
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCa, 1); // set input carry to simulate offset of 1
- ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa);
- if ((opcode & PPC_OPC_RC))
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
+ // rD = rB - rA
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUBFC, registerRD, registerRA, registerRB);
+ if (opcode & PPC_OPC_RC)
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL);
return true;
}
bool PPCRecompilerImlGen_SUBFIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- // d = ~a + imm + 1
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
- IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA);
- ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regTmp, (sint32)imm + 1, regCa);
+ //uint32 a = hCPU->gpr[rA];
+ //hCPU->gpr[rD] = ~a + imm + 1;
+ // cr0 is never affected
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUBFC, registerRD, registerRA, imm);
return true;
}
@@ -984,9 +1217,10 @@ bool PPCRecompilerImlGen_MULLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_MULTIPLY_SIGNED, regD, regA, (sint32)imm);
+ // mulli instruction does not modify any flags
+ uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false);
+ uint32 registerOperand = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand, (sint32)imm);
return true;
}
@@ -994,16 +1228,18 @@ bool PPCRecompilerImlGen_MULLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
+ //hCPU->gpr[rD] = hCPU->gpr[rA] * hCPU->gpr[rB];
+ uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false);
+ uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
if (opcode & PPC_OPC_OE)
{
return false;
}
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, regD, regA, regB);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2);
return true;
}
@@ -1011,12 +1247,14 @@ bool PPCRecompilerImlGen_MULHW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, regD, regA, regB);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ //hCPU->gpr[rD] = ((sint64)(sint32)hCPU->gpr[rA] * (sint64)(sint32)hCPU->gpr[rB])>>32;
+ uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false);
+ uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2);
return true;
}
@@ -1024,12 +1262,14 @@ bool PPCRecompilerImlGen_MULHWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, regD, regA, regB);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ //hCPU->gpr[rD] = (hCPU->gpr[rA] * hCPU->gpr[rB])>>32;
+ uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false);
+ uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2);
return true;
}
@@ -1037,12 +1277,18 @@ bool PPCRecompilerImlGen_DIVW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regR = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, regR, regA, regB);
+ // hCPU->gpr[rD] = (sint32)a / (sint32)b;
+ uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false);
+ uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regR);
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2);
+ }
return true;
}
@@ -1050,66 +1296,84 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, regD, regA, regB);
+ // hCPU->gpr[rD] = (uint32)a / (uint32)b;
+ uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false);
+ uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2);
+ }
return true;
}
bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- sint32 rS, rA, SH, MB, ME;
+ int rS, rA, SH, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
uint32 mask = ppc_mask(MB, ME);
+ //uint32 v = ppc_word_rotl(hCPU->gpr[rS], SH);
+ //hCPU->gpr[rA] = v & mask;
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- if( ME == (31-SH) && MB == 0 )
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // handle special forms of RLWINM
+ if( SH == 0 && SH == (ME-SH) && MB == 0 )
+ {
+ // CLRRWI
+ // todo
+ }
+ else if( ME == (31-SH) && MB == 0 )
{
// SLWI
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regA, regS, SH);
+ if(opcode&PPC_OPC_RC)
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH);
+ return true;
}
else if( SH == (32-MB) && ME == 31 )
{
// SRWI
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regA, regS, MB);
+ if(opcode&PPC_OPC_RC)
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB);
+ return true;
+ }
+ // general handler
+ if( registerRA != registerRS )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRA, registerRS);
+ if( SH != 0 )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ if(opcode&PPC_OPC_RC)
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL);
}
else
{
- // general handler
- if (rA != rS)
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS);
- if (SH != 0)
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, regA, SH);
- if (mask != 0xFFFFFFFF)
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regA, (sint32)mask);
+ if( mask != 0xFFFFFFFF )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
}
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
return true;
}
bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- sint32 rS, rA, SH, MB, ME;
+ int rS, rA, SH, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regR = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
- uint32 mask = ppc_mask(MB, ME);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regTmp, regS);
- if (SH)
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, regTmp, SH);
- if (mask != 0)
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regR, regR, (sint32)~mask);
- if (mask != 0xFFFFFFFF)
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, (sint32)mask);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regR, regR, regTmp);
+
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // pack RLWIMI parameters into single integer
+ uint32 vImm = MB|(ME<<8)|(SH<<16);
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm, PPC_REC_INVALID_REGISTER, 0);
if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regR);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, registerRA, registerRA, 0, PPCREC_CR_MODE_LOGICAL);
return true;
}
@@ -1117,61 +1381,61 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
sint32 rS, rA, rB, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, rB, MB, ME);
+ // uint32 v = ppc_word_rotl(hCPU->gpr[rS], hCPU->gpr[rB]);
uint32 mask = ppc_mask(MB, ME);
- IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
- IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
- IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, regA, regS, regB);
- if( mask != 0xFFFFFFFF )
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regA, (sint32)mask);
+ // uint32 v = ppc_word_rotl(hCPU->gpr[rS], hCPU->gpr[rB]);
+ // hCPU->gpr[rA] = v & mask;
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB);
if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ if( mask != 0xFFFFFFFF )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, PPC_REC_INVALID_REGISTER, 0);
+ }
return true;
}
bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- // unlike SRAWI, for SRAW the shift range is 0-63 (masked to 6 bits)
- // but only shifts up to register bitwidth minus one are well defined in IML so this requires special handling for shifts >= 32
sint32 rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
- IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
- IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
- IMLReg regCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
+ //uint32 SH = hCPU->gpr[rB] & 0x3f;
+ //hCPU->gpr[rA] = hCPU->gpr[rS];
+ //hCPU->xer_ca = 0;
+ //if (hCPU->gpr[rA] & 0x80000000) {
+ // uint32 ca = 0;
+ // for (uint32 i=0; i < SH; i++) {
+ // if (hCPU->gpr[rA] & 1) ca = 1;
+ // hCPU->gpr[rA] >>= 1;
+ // hCPU->gpr[rA] |= 0x80000000;
+ // }
+ // if (ca) hCPU->xer_ca = 1;
+ //} else {
+ // if (SH > 31) {
+ // hCPU->gpr[rA] = 0;
+ // } else {
+ // hCPU->gpr[rA] >>= SH;
+ // }
+ //}
+ //if (Opcode & PPC_OPC_RC) {
+ // // update cr0 flags
+ // ppc_update_cr0(hCPU, hCPU->gpr[rA]);
+ //}
- IMLReg regTmpShiftAmount = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
- IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
- IMLReg regTmp1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2);
- IMLReg regTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3);
-
- // load masked shift factor into temporary register
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmpShiftAmount, regB, 0x3F);
- ppcImlGenContext->emitInst().make_compare_s32(regTmpShiftAmount, 31, regTmpCondBool, IMLCondition::UNSIGNED_GT);
- ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true);
-
- PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock,
- [&](ppcImlGenContext_t& genCtx)
- {
- /* branch taken, shift size 32 or above */
- genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, 31); // shift the sign bit into all the bits
- genCtx.emitInst().make_compare_s32(regA, 0, regCarry, IMLCondition::NEQ);
- },
- [&](ppcImlGenContext_t& genCtx)
- {
- /* branch not taken, shift size below 32 */
- genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp1, regS, 31); // signMask = input >> 31 (arithmetic shift)
- genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regTmp2, 1); // shiftMask = ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp, regS, 31); // signMask = input >> 31 (arithmetic shift)
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp, regTmp, regS); // testValue = input & signMask & ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, ((1 << SH) - 1));
- ppcImlGenContext->emitInst().make_compare_s32(regTmp, 0, regCarry, IMLCondition::NEQ); // ca = (testValue != 0)
- // do the actual shift
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, (sint32)SH);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH);
return true;
}
@@ -1204,12 +1459,17 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
int rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, regA, regS, regB);
- if ((opcode & PPC_OPC_RC))
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if (opcode & PPC_OPC_RC)
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0);
+ }
return true;
}
@@ -1217,24 +1477,37 @@ bool PPCRecompilerImlGen_SRW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
{
int rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, regA, regS, regB);
+
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0);
+ }
return true;
}
+
bool PPCRecompilerImlGen_EXTSH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
int rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, regA, regS);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ PPC_ASSERT(rB==0);
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if ( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS, 0, PPCREC_CR_MODE_ARITHMETIC);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS);
+ }
return true;
}
@@ -1242,11 +1515,16 @@ bool PPCRecompilerImlGen_EXTSB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
sint32 rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, regA, regS);
- if ((opcode & PPC_OPC_RC))
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if ( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS, 0, PPCREC_CR_MODE_ARITHMETIC);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS);
+ }
return true;
}
@@ -1254,11 +1532,30 @@ bool PPCRecompilerImlGen_CNTLZW(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
{
sint32 rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, regA, regS);
- if ((opcode & PPC_OPC_RC))
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ PPC_ASSERT(rB==0);
+ if( opcode&PPC_OPC_RC )
+ {
+ return false;
+ }
+ uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
+ uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_CNTLZW, registerRA, registerRS);
+
+ //uint32 n=0;
+ //uint32 x=0x80000000;
+ //uint32 v=hCPU->gpr[rS];
+ //while (!(v & x)) {
+ // n++;
+ // if (n==32) break;
+ // x>>=1;
+ //}
+ //hCPU->gpr[rA] = n;
+ //if (Opcode & PPC_OPC_RC) {
+ // // update cr0 flags
+ // ppc_update_cr0(hCPU, hCPU->gpr[rA]);
+ //}
+
+
return true;
}
@@ -1266,124 +1563,438 @@ bool PPCRecompilerImlGen_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, regD, regA);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
+ PPC_ASSERT(rB == 0);
+ //hCPU->gpr[rD] = -((signed int)hCPU->gpr[rA]);
+ //if (Opcode & PPC_OPC_RC) {
+ // // update cr0 flags
+ // ppc_update_cr0(hCPU, hCPU->gpr[rD]);
+ //}
+ uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NEG, registerRD, registerRA, 0, PPCREC_CR_MODE_ARITHMETIC);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NEG, registerRD, registerRA);
+ }
return true;
}
-bool PPCRecompilerImlGen_LOAD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg)
+void PPCRecompilerImlGen_LWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
int rA, rD;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
- IMLReg regMemAddr;
- if (rA == 0)
+ if( rA == 0 )
{
- if (updateAddrReg)
- return false; // invalid instruction form
- regMemAddr = _GetRegTemporary(ppcImlGenContext, 0);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemAddr, 0);
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
}
- else
- {
- if (updateAddrReg && rA == rD)
- return false; // invalid instruction form
- regMemAddr = _GetRegGPR(ppcImlGenContext, rA);
- }
- if (updateAddrReg)
- {
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regMemAddr, regMemAddr, (sint32)imm);
- imm = 0;
- }
- IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD);
- ppcImlGenContext->emitInst().make_r_memory(regDst, regMemAddr, (sint32)imm, bitWidth, signExtend, isBigEndian);
- return true;
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 32, false, true);
}
-void PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg)
+void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // add imm to memory register
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 32, false, true);
+}
+
+void PPCRecompilerImlGen_LHA(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 16, true, true);
+}
+
+void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // add imm to memory register
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, true, true);
+}
+
+void PPCRecompilerImlGen_LHZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ // note: Darksiders 2 has this instruction form but it is never executed.
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 16, false, true);
+}
+
+void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // add imm to memory register
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, false, true);
+}
+
+void PPCRecompilerImlGen_LBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load byte
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 8, false, true);
+}
+
+void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // add imm to memory register
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load byte
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 8, false, true);
+}
+
+bool PPCRecompilerImlGen_LWZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- // if rA == rD, then the EA wont be stored to rA. We could set updateAddrReg to false in such cases but the end result is the same since the loaded value would overwrite rA
sint32 rA, rD, rB;
PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
- updateAddrReg = updateAddrReg && (rA != 0);
- IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID;
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD);
- if (updateAddrReg)
+ if( rA == 0 )
{
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regA, regA, regB);
- // use single register addressing
- regB = regA;
- regA = IMLREG_INVALID;
+ return false;
}
- if(regA.IsValid())
- PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian);
- else
- ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian);
-}
-
-bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg)
-{
- int rA, rD;
- uint32 imm;
- PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
- IMLReg regA;
- if (rA != 0)
- {
- regA = _GetRegGPR(ppcImlGenContext, rA);
- }
- else
- {
- if (updateAddrReg)
- return false; // invalid instruction form
- regA = _GetRegTemporary(ppcImlGenContext, 0);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regA, 0);
- }
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
- if (updateAddrReg)
- {
- if (rD == rA)
- {
- // make sure to keep source data intact
- regD = _GetRegTemporary(ppcImlGenContext, 0);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regD, regA);
- }
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regA, regA, (sint32)imm);
- imm = 0;
- }
- ppcImlGenContext->emitInst().make_memory_r(regD, regA, (sint32)imm, bitWidth, isBigEndian);
+ // hCPU->gpr[rD] = memory_readU8((rA?hCPU->gpr[rA]:0)+hCPU->gpr[rB]);
+ // load memory rA and rB into register
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load word
+ PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, true);
return true;
}
-bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg)
+bool PPCRecompilerImlGen_LWZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- sint32 rA, rS, rB;
- PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID;
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regSrc = _GetRegGPR(ppcImlGenContext, rS);
- if (updateAddrReg)
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ if( rA == 0 )
{
- if(rA == 0)
- return false; // invalid instruction form
- if (regSrc == regA)
- {
- // make sure to keep source data intact
- regSrc = _GetRegTemporary(ppcImlGenContext, 0);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regSrc, regA);
- }
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regA, regA, regB);
- // use single register addressing
- regB = regA;
- regA = IMLREG_INVALID;
+ return false;
}
- if (regA.IsInvalid())
- ppcImlGenContext->emitInst().make_memory_r(regSrc, regB, 0, bitWidth, isBigEndian);
+ // load memory rA and rB into register
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // add rB to rA
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB);
+ // load word
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 32, false, true);
+ return true;
+}
+
+bool PPCRecompilerImlGen_LWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ // load memory rA and rB into register
+ uint32 gprRegisterA = 0;
+ if( rA )
+ gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ if (destinationRegister == PPC_REC_INVALID_REGISTER)
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register
+ // load word
+ if( rA )
+ PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, false);
else
- PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, regSrc, regA, regB, bitWidth, false, isBigEndian);
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 32, false, false);
+ return true;
+}
+
+bool PPCRecompilerImlGen_LHAX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return true;
+ }
+ // load memory rA and rB into register
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load half word
+ PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, true, true);
+ return true;
+}
+
+bool PPCRecompilerImlGen_LHAUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return true;
+ }
+ // load memory rA and rB into register
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // add rB to rA
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB);
+ // load half word
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 16, true, true);
+ return true;
+}
+
+bool PPCRecompilerImlGen_LHZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return true;
+ }
+ // load memory rA and rB into register
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load half word
+ PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, false, true);
+ return true;
+}
+
+bool PPCRecompilerImlGen_LHZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return true;
+ }
+ // load memory rA and rB into register
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // add rB to rA
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB);
+ // load hald word
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 16, false, true);
+ return true;
+}
+
+void PPCRecompilerImlGen_LHBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ // load memory rA and rB into register
+ uint32 gprRegisterA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false) : 0;
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ if (destinationRegister == PPC_REC_INVALID_REGISTER)
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register
+ // load half word (little-endian)
+ if (rA == 0)
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 16, false, false);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, false, false);
+}
+
+bool PPCRecompilerImlGen_LBZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ if( rA == 0 )
+ {
+ // special case where rA is ignored and only rB is used
+ return false;
+ }
+ // hCPU->gpr[rD] = memory_readU8((rA?hCPU->gpr[rA]:0)+hCPU->gpr[rB]);
+ // load memory rA and rB into register
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load byte
+ PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 8, false, true);
+ return true;
+}
+
+bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ if (rA == 0)
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return true;
+ }
+ // load memory rA and rB into register
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD);
+ if (destinationRegister == PPC_REC_INVALID_REGISTER)
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register
+ // add rB to rA
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB);
+ // load byte
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 8, false, true);
+ return true;
+}
+
+bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rD, rB;
+ PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
+ // load memory rA and rB into register
+ uint32 gprRegisterA = rA != 0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0;
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load word
+ if( rA != 0 )
+ PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_LOAD_LWARX_MARKER, false, true);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_LOAD_LWARX_MARKER, false, true);
return true;
}
@@ -1392,33 +2003,257 @@ void PPCRecompilerImlGen_LMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
- cemu_assert_debug(rA != 0);
+ //uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm;
sint32 index = 0;
- while (rD <= 31)
+ while( rD <= 31 )
{
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
// load word
- ppcImlGenContext->emitInst().make_r_memory(regD, regA, (sint32)imm + index * 4, 32, false, true);
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm+index*4, 32, false, true);
// next
rD++;
index++;
}
}
+void PPCRecompilerImlGen_STW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ // note: Darksiders 2 has this instruction form but it is never executed.
+ //PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // load source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister
+ // store word
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 32, true);
+}
+
+void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // store&update instructions where rD==rA store the register contents without added imm, therefore we need to handle it differently
+ // get memory gpr register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // get source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister
+ // add imm to memory register early if possible
+ if( rD != rA )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ // store word
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true);
+ // add imm to memory register late if we couldn't do it early
+ if( rD == rA )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+}
+
+void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // load source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 16, true);
+}
+
+void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // get memory gpr register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // get source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister
+ // add imm to memory register early if possible
+ if( rD != rA )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ // store word
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true);
+ // add imm to memory register late if we couldn't do it early
+ if( rD == rA )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+}
+
+void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rS;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rS, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // load source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister
+ // store byte
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 8, true);
+}
+
+void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ int rA, rD;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
+ if( rA == 0 )
+ {
+ // special form where gpr is ignored and only imm is used
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch);
+ return;
+ }
+ // get memory gpr register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // get source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister
+ // add imm to memory register early if possible
+ if( rD != rA )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+ // store byte
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true);
+ // add imm to memory register late if we couldn't do it early
+ if( rD == rA )
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+}
+
+// generic indexed store (STWX, STHX, STBX, STWUX. If bitReversed == true -> STHBRX)
+bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 storeBitWidth, bool byteReversed = false)
+{
+ sint32 rA, rS, rB;
+ PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
+ // prepare registers
+ uint32 gprRegisterA;
+ if(rA != 0)
+ gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ // store word
+ if (rA == 0)
+ {
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, storeBitWidth, !byteReversed);
+ }
+ else
+ PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, !byteReversed);
+ return true;
+}
+
+bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 storeBitWidth)
+{
+ sint32 rA, rS, rB;
+ PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
+ if( rA == 0 )
+ {
+ // not supported
+ return false;
+ }
+ if( rS == rA || rS == rB )
+ {
+ // prepare registers
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ // store word
+ PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, true);
+ // update EA after store
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB);
+ return true;
+ }
+ // prepare registers
+ uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 sourceRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ // update EA
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB);
+ // store word
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegisterA, 0, storeBitWidth, true);
+ return true;
+}
+
+bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rS, rB;
+ PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
+ // prepare registers
+ uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0;
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ // store word
+ if( rA != 0 )
+ PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_STORE_STWCX_MARKER, false, true);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_STORE_STWCX_MARKER, true);
+ return true;
+}
+
+bool PPCRecompilerImlGen_STWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rA, rS, rB;
+ PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
+ // prepare registers
+ uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0;
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ // store word
+ if( rA != 0 )
+ PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, false);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 32, false);
+ return true;
+}
+
void PPCRecompilerImlGen_STMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rS, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rS, rA, imm);
- cemu_assert_debug(rA != 0);
sint32 index = 0;
while( rS <= 31 )
{
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // load source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister
// store word
- ppcImlGenContext->emitInst().make_memory_r(regS, regA, (sint32)imm + index * 4, 32, true);
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm+index*4, 32, true);
// next
rS++;
index++;
@@ -1431,43 +2266,70 @@ bool PPCRecompilerImlGen_LSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
PPC_OPC_TEMPL_X(opcode, rD, rA, nb);
if( nb == 0 )
nb = 32;
-
- if (rA == 0)
+ if( nb == 4 )
{
- cemu_assert_unimplemented(); // special form where gpr is ignored and EA is 0
- return false;
- }
-
- // potential optimization: On x86 unaligned access is allowed and we could handle the case nb==4 with a single memory read, and nb==2 with a memory read and shift
-
- IMLReg memReg = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
- uint32 memOffset = 0;
- while (nb > 0)
- {
- if (rD == rA)
- return false;
- cemu_assert(rD < 32);
- IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD);
- // load bytes one-by-one
- for (sint32 b = 0; b < 4; b++)
+ // if nb == 4 this instruction immitates LWZ
+ if( rA == 0 )
{
- ppcImlGenContext->emitInst().make_r_memory(regTmp, memReg, memOffset + b, 8, false, false);
- sint32 shiftAmount = (3 - b) * 8;
- if(shiftAmount)
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regTmp, regTmp, shiftAmount);
- if(b == 0)
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regDst, regTmp);
- else
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regDst, regDst, regTmp);
- nb--;
- if (nb == 0)
- break;
+#ifdef CEMU_DEBUG_ASSERT
+ assert_dbg(); // special form where gpr is ignored and only imm is used
+#endif
+ return false;
}
- memOffset += 4;
- rD++;
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 32, false, true);
+ return true;
}
- return true;
+ else if( nb == 2 )
+ {
+ // if nb == 2 this instruction immitates a LHZ but the result is shifted left by 16 bits
+ if( rA == 0 )
+ {
+#ifdef CEMU_DEBUG_ASSERT
+ assert_dbg(); // special form where gpr is ignored and only imm is used
+#endif
+ return false;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, false, true);
+ // shift
+ PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, destinationRegister, destinationRegister, 16);
+ return true;
+ }
+ else if( nb == 3 )
+ {
+ // if nb == 3 this instruction loads a 3-byte big-endian and the result is shifted left by 8 bits
+ if( rA == 0 )
+ {
+#ifdef CEMU_DEBUG_ASSERT
+ assert_dbg(); // special form where gpr is ignored and only imm is used
+#endif
+ return false;
+ }
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // check if destination register is already loaded
+ uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
+ if( destinationRegister == PPC_REC_INVALID_REGISTER )
+ destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
+ // load half
+ PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, PPC_REC_STORE_LSWI_3, false, true);
+ return true;
+ }
+ debug_printf("PPCRecompilerImlGen_LSWI(): Unsupported nb value %d\n", nb);
+ return false;
}
bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@@ -1476,111 +2338,38 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
PPC_OPC_TEMPL_X(opcode, rS, rA, nb);
if( nb == 0 )
nb = 32;
-
- IMLReg regMem = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
- uint32 memOffset = 0;
- while (nb > 0)
+ if( nb == 4 )
{
- if (rS == rA)
- return false;
- cemu_assert(rS < 32);
- IMLReg regSrc = _GetRegGPR(ppcImlGenContext, rS);
- // store bytes one-by-one
- for (sint32 b = 0; b < 4; b++)
- {
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regTmp, regSrc);
- sint32 shiftAmount = (3 - b) * 8;
- if (shiftAmount)
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regTmp, regTmp, shiftAmount);
- ppcImlGenContext->emitInst().make_memory_r(regTmp, regMem, memOffset + b, 8, false);
- nb--;
- if (nb == 0)
- break;
- }
- memOffset += 4;
- rS++;
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // load source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister
+ // store word
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, 32, true);
+ return true;
}
- return true;
-}
-
-bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- sint32 rA, rD, rB;
- PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
-
- IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID;
- IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB);
- IMLReg regD = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
- IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA);
- IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL);
- // calculate EA
- if (regA.IsValid())
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB);
- else
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB);
- // load word
- ppcImlGenContext->emitInst().make_r_memory(regD, regMemResEA, 0, 32, false, true);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResVal, regD);
- return true;
-}
-
-bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
-{
- sint32 rA, rS, rB;
- PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID;
- IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB);
- IMLReg regData = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS);
- IMLReg regTmpDataBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2);
- IMLReg regTmpCompareBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3);
- // calculate EA
- IMLReg regCalcEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY);
- if (regA.IsValid())
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regCalcEA, regA, regB);
- else
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCalcEA, regB);
- // get CR bit regs and set LT, GT and SO immediately
- IMLReg regCrLT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_LT);
- IMLReg regCrGT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_GT);
- IMLReg regCrEQ = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_EQ);
- IMLReg regCrSO = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO);
- IMLReg regXerSO = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrLT, 0);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrGT, 0);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrSO, regXerSO);
- // get regs for reservation address and value
- IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA);
- IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL);
- // compare calculated EA with reservation
- IMLReg regTmpBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
- ppcImlGenContext->emitInst().make_compare(regCalcEA, regMemResEA, regTmpBool, IMLCondition::EQ);
- ppcImlGenContext->emitInst().make_conditional_jump(regTmpBool, true);
-
- PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock,
- [&](ppcImlGenContext_t& genCtx)
- {
- /* branch taken, EA matching */
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpDataBE, regData);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpCompareBE, regMemResVal);
- ppcImlGenContext->emitInst().make_atomic_cmp_store(regMemResEA, regTmpCompareBE, regTmpDataBE, regCrEQ);
- },
- [&](ppcImlGenContext_t& genCtx)
- {
- /* branch not taken, EA mismatching */
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrEQ, 0);
- }
- );
-
- // reset reservation
- // I found contradictory information of whether the reservation is cleared in all cases, so unit testing would be required
- // Most sources state that it is cleared on successful store. They don't explicitly mention what happens on failure
- // "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared
- // There may also be different behavior between individual PPC architectures
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResEA, 0);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResVal, 0);
-
- return true;
+ else if( nb == 2 )
+ {
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // load source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister
+ // store half-word (shifted << 16)
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_2, false);
+ return true;
+ }
+ else if( nb == 3 )
+ {
+ // load memory gpr into register
+ uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
+ // load source register
+ uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister
+ // store 3-byte-word (shifted << 8)
+ PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_3, false);
+ return true;
+ }
+ debug_printf("PPCRecompilerImlGen_STSWI(): Unsupported nb value %d\n", nb);
+ return false;
}
bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@@ -1589,39 +2378,92 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
rA = (opcode>>16)&0x1F;
rB = (opcode>>11)&0x1F;
// prepare registers
- IMLReg regA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID;
- IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
- // load zero into a temporary register
- IMLReg regZero = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regZero, 0);
- // prepare EA and align it to cacheline
- IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
- if(rA != 0)
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB);
+ uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0;
+ uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
+ // store
+ if( rA != 0 )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB);
else
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regMemResEA, regMemResEA, ~31);
- // zero out the cacheline
- for(sint32 i = 0; i < 32; i += 4)
- ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_DCBZ, gprRegisterB, gprRegisterB);
return true;
}
-bool PPCRecompilerImlGen_OR_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult)
+bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
int rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- if(rS == rB) // check for MR mnemonic
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS);
+ // check for MR mnemonic
+ if( rS == rB )
+ {
+ // simple register copy
+ if( rA != rS ) // check if no-op
+ {
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ }
+ }
+ else
+ {
+ if( opcode&PPC_OPC_RC )
+ {
+ // no effect but CR is updated
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprSourceReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ // no-op
+ }
+ }
+ }
else
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regB);
- if(complementResult)
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ {
+ // rA = rS | rA
+ sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg )
+ {
+ // make sure we don't overwrite rS or rA
+ if( gprSource1Reg == gprDestReg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg);
+ }
+ if( opcode&PPC_OPC_RC )
+ {
+ // fixme: merge CR update into OR instruction above
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ }
+ else
+ {
+ // rA = rS
+ if( gprDestReg != gprSource1Reg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg);
+ }
+ // rA |= rB
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg);
+ }
+ }
+ }
return true;
}
@@ -1629,33 +2471,151 @@ bool PPCRecompilerImlGen_ORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
{
sint32 rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- // rA = rS | ~rB;
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regTmp);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ // hCPU->gpr[rA] = hCPU->gpr[rS] | ~hCPU->gpr[rB];
+ sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ORC, gprDestReg, gprSource1Reg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ORC, gprDestReg, gprSource1Reg, gprSource2Reg);
return true;
}
-bool PPCRecompilerImlGen_AND_NAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult)
+bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
int rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- if (regS == regB)
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS);
+ //hCPU->gpr[rA] = ~(hCPU->gpr[rS] | hCPU->gpr[rB]);
+ // check for NOT mnemonic
+ if( rS == rB )
+ {
+ // simple register copy with NOT
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( gprDestReg != gprSourceReg )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_ARITHMETIC);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg);
+ }
+ }
else
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regA, regS, regB);
- if (complementResult)
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ {
+ // rA = rS | rA
+ sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg )
+ {
+ // make sure we don't overwrite rS or rA
+ if( gprSource1Reg == gprDestReg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg);
+ }
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg);
+ if( opcode&PPC_OPC_RC )
+ {
+ // fixme: merge CR update into OR instruction above
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ }
+ else
+ {
+ // rA = rS
+ if( gprDestReg != gprSource1Reg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg);
+ }
+ // rA |= rB
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg);
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_ARITHMETIC);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg);
+ }
+ }
+ }
+ return true;
+}
+
+bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rS, rA, rB;
+ PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
+ // check for MR mnemonic
+ if( rS == rB )
+ {
+ // simple register copy
+ if( rA != rS ) // check if no-op
+ {
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ }
+ }
+ else
+ {
+ cemu_assert_unimplemented(); // no-op -> verify this case
+ }
+ }
+ else
+ {
+ // rA = rS & rA
+ sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg )
+ {
+ // make sure we don't overwrite rS or rA
+ if( gprSource1Reg == gprDestReg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource1Reg);
+ }
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ }
+ else
+ {
+ // rA = rS
+ if( gprDestReg != gprSource1Reg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg);
+ }
+ // rA &= rB
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg);
+ }
+ }
+ }
return true;
}
@@ -1663,101 +2623,277 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
{
sint32 rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- // rA = rS & ~rB;
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regA, regS, regTmp);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ //hCPU->gpr[rA] = hCPU->gpr[rS] & ~hCPU->gpr[rB];
+ //if (Opcode & PPC_OPC_RC) {
+ if( rS == rB )
+ {
+ // result is always 0 -> replace with XOR rA,rA
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg);
+ }
+ }
+ else if( rA == rB )
+ {
+ // rB already in rA, therefore we complement rA first and then AND it with rS
+ sint32 gprRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // rA = ~rA
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA);
+ // rA &= rS
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprRA, gprRS, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprRA, gprRS);
+ }
+ }
+ else
+ {
+ // a & (~b) is the same as ~((~a) | b)
+ sint32 gprRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ sint32 gprRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
+ sint32 gprRS = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ // move rS to rA (if required)
+ if( gprRA != gprRS )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprRA, gprRS);
+ }
+ // rS already in rA, therefore we complement rS first and then OR it with rB
+ // rA = ~rA
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA);
+ // rA |= rB
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprRA, gprRB);
+ // rA = ~rA
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA);
+ }
+ }
return true;
}
-bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult)
+void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rS, rA;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
+ // ANDI. always sets cr0 flags
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // rA = rS
+ if( gprDestReg != gprSourceReg )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ // rA &= imm32
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL);
+}
+
+void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rS, rA;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
+ // ANDI. always sets cr0 flags
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // rA = rS
+ if( gprDestReg != gprSourceReg )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ // rA &= imm32
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL);
+}
+
+bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
if( rS == rB )
{
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regA, 0);
+ // xor register with itself
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg);
+ }
}
else
{
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regA, regS, regB);
+ // rA = rS ^ rA
+ sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg )
+ {
+ // make sure we don't overwrite rS or rA
+ if( gprSource1Reg == gprDestReg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg);
+ }
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ }
+ else
+ {
+ // rA = rS
+ if( gprDestReg != gprSource1Reg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg);
+ }
+ // rA ^= rB
+ if( opcode&PPC_OPC_RC )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg);
+ }
+ }
}
- if (complementResult)
- ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA);
- if (opcode & PPC_OPC_RC)
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
return true;
}
-void PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted)
+
+bool PPCRecompilerImlGen_EQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
- sint32 rS, rA;
- uint32 imm;
- if (isShifted)
+ sint32 rS, rA, rB;
+ PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
+ if( rS == rB )
{
- PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
+ // xor register with itself, then invert
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg);
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg);
}
else
{
- PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
+ // rA = ~(rS ^ rA)
+ sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg )
+ {
+ // make sure we don't overwrite rS or rA
+ if( gprSource1Reg == gprDestReg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg);
+ }
+ else
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg);
+ }
+ }
+ else
+ {
+ // rA = rS
+ if( gprDestReg != gprSource1Reg )
+ {
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg);
+ }
+ // rA ^= rB
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg);
+ }
+ if( opcode&PPC_OPC_RC )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL);
+ else
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg);
}
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regS, (sint32)imm);
- // ANDI/ANDIS always updates cr0
- PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
+ return true;
}
-void PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted)
+void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rS, rA;
uint32 imm;
- if (isShifted)
- {
- PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
- }
- else
- {
- PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
- }
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, regA, regS, (sint32)imm);
+ PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
+ // ORI does not set cr0 flags
+ //hCPU->gpr[rA] = hCPU->gpr[rS] | imm;
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // rA = rS
+ if( gprDestReg != gprSourceReg )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ // rA |= imm32
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
}
-void PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted)
+void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rS, rA;
uint32 imm;
- if (isShifted)
- {
- PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
- }
- else
- {
- PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
- }
- IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
- IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regA, regS, (sint32)imm);
+ PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
+ // ORI does not set cr0 flags
+ //hCPU->gpr[rA] = hCPU->gpr[rS] | imm;
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // rA = rS
+ if( gprDestReg != gprSourceReg )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ // rA |= imm32
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+}
+
+void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rS, rA;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm);
+ //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm;
+ // XORI does not set cr0 flags
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // rA = rS
+ if( gprDestReg != gprSourceReg )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ // rA |= imm32
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
+}
+
+void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
+{
+ sint32 rS, rA;
+ uint32 imm;
+ PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm);
+ //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm;
+ // XORIS does not set cr0 flags
+ sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
+ sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
+ // rA = rS
+ if( gprDestReg != gprSourceReg )
+ PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
+ // rA |= imm32
+ PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
}
bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
int crD, crA, crB;
PPC_OPC_TEMPL_X(opcode, crD, crA, crB);
- IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA);
- IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB);
- IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regCrB);
+ PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_OR, crD, crA, crB);
return true;
}
@@ -1765,12 +2901,7 @@ bool PPCRecompilerImlGen_CRORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
int crD, crA, crB;
PPC_OPC_TEMPL_X(opcode, crD, crA, crB);
- IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA);
- IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB);
- IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD);
- IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regTmp);
+ PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ORC, crD, crA, crB);
return true;
}
@@ -1778,10 +2909,7 @@ bool PPCRecompilerImlGen_CRAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
int crD, crA, crB;
PPC_OPC_TEMPL_X(opcode, crD, crA, crB);
- IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA);
- IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB);
- IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD);
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regCrB);
+ PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_AND, crD, crA, crB);
return true;
}
@@ -1789,12 +2917,7 @@ bool PPCRecompilerImlGen_CRANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
{
int crD, crA, crB;
PPC_OPC_TEMPL_X(opcode, crD, crA, crB);
- IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA);
- IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB);
- IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD);
- IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regTmp);
+ PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ANDC, crD, crA, crB);
return true;
}
@@ -1802,15 +2925,17 @@ bool PPCRecompilerImlGen_CRXOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
int crD, crA, crB;
PPC_OPC_TEMPL_X(opcode, crD, crA, crB);
- IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA);
- IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB);
- IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD);
- if (regCrA == regCrB)
+ if (crA == crB)
{
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 0);
+ // both operands equal, clear bit in crD
+ // PPC's assert() uses this to pass a parameter to OSPanic
+ PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_CLEAR, crD, 0, 0);
return true;
}
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regCrB);
+ else
+ {
+ return false;
+ }
return true;
}
@@ -1818,24 +2943,23 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
int crD, crA, crB;
PPC_OPC_TEMPL_X(opcode, crD, crA, crB);
- IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA);
- IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB);
- IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD);
- if (regCrA == regCrB)
+ if (crA == crB)
{
- ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 1);
+ // both operands equal, set bit in crD
+ PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_SET, crD, 0, 0);
return true;
}
- IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY);
- ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB
- ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regTmp);
+ else
+ {
+ return false;
+ }
return true;
}
bool PPCRecompilerImlGen_HLE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
uint32 hleFuncId = opcode&0xFFFF;
- ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0, IMLREG_INVALID);
+ PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0);
return true;
}
@@ -1846,6 +2970,12 @@ uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenCont
return v;
}
+uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset)
+{
+ uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction + offset/4));
+ return v;
+}
+
uint32 PPCRecompiler_getCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext)
{
uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction));
@@ -1858,10 +2988,480 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext
return v;
}
-void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index)
+char _tempOpcodename[32];
+
+const char* PPCRecompiler_getOpcodeDebugName(PPCRecImlInstruction_t* iml)
+{
+ uint32 op = iml->operation;
+ if (op == PPCREC_IML_OP_ASSIGN)
+ return "MOV";
+ else if (op == PPCREC_IML_OP_ADD)
+ return "ADD";
+ else if (op == PPCREC_IML_OP_SUB)
+ return "SUB";
+ else if (op == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY)
+ return "ADDCSC";
+ else if (op == PPCREC_IML_OP_OR)
+ return "OR";
+ else if (op == PPCREC_IML_OP_AND)
+ return "AND";
+ else if (op == PPCREC_IML_OP_XOR)
+ return "XOR";
+ else if (op == PPCREC_IML_OP_LEFT_SHIFT)
+ return "LSH";
+ else if (op == PPCREC_IML_OP_RIGHT_SHIFT)
+ return "RSH";
+ else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED)
+ return "MULS";
+ else if (op == PPCREC_IML_OP_DIVIDE_SIGNED)
+ return "DIVS";
+
+ sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type);
+ return _tempOpcodename;
+}
+
+void PPCRecDebug_addRegisterParam(StringBuf& strOutput, sint32 virtualRegister, bool isLast = false)
+{
+ if (isLast)
+ {
+ if (virtualRegister < 10)
+ strOutput.addFmt("t{} ", virtualRegister);
+ else
+ strOutput.addFmt("t{}", virtualRegister);
+ return;
+ }
+ if (virtualRegister < 10)
+ strOutput.addFmt("t{} , ", virtualRegister);
+ else
+ strOutput.addFmt("t{}, ", virtualRegister);
+}
+
+void PPCRecDebug_addS32Param(StringBuf& strOutput, sint32 val, bool isLast = false)
+{
+ if (isLast)
+ {
+ strOutput.addFmt("0x{:08x}", val);
+ return;
+ }
+ strOutput.addFmt("0x{:08x}, ", val);
+}
+
+void PPCRecompilerDebug_printLivenessRangeInfo(StringBuf& currentLineText, PPCRecImlSegment_t* imlSegment, sint32 offset)
+{
+ // pad to 70 characters
+ sint32 index = currentLineText.getLen();
+ while (index < 70)
+ {
+ debug_printf(" ");
+ index++;
+ }
+ raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ if (offset == subrangeItr->start.index)
+ {
+ if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index)
+ {
+ debug_printf("*%-2d", subrangeItr->range->virtualRegister);
+ }
+ else
+ {
+ debug_printf("|%-2d", subrangeItr->range->virtualRegister);
+ }
+ }
+ else if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index )
+ {
+ debug_printf("* ");
+ }
+ else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index)
+ {
+ debug_printf("| ");
+ }
+ else
+ {
+ debug_printf(" ");
+ }
+ index += 3;
+ // next
+ subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
+ }
+}
+
+void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo)
+{
+ StringBuf strOutput(1024);
+
+ strOutput.addFmt("SEGMENT 0x{:04x} 0x{:08x} PPC 0x{:08x} - 0x{:08x} Loop-depth {}", segmentIndex, imlSegment->ppcAddress, imlSegment->ppcAddrMin, imlSegment->ppcAddrMax, imlSegment->loopDepth);
+ if (imlSegment->isEnterable)
+ {
+ strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress);
+ }
+ else if( imlSegment->isJumpDestination )
+ {
+ strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress);
+ }
+
+ debug_printf("%s\n", strOutput.c_str());
+
+ strOutput.reset();
+ strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment);
+ debug_printf("%s", strOutput.c_str());
+
+ if (printLivenessRangeInfo)
+ {
+ PPCRecompilerDebug_printLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START);
+ }
+ debug_printf("\n");
+
+ sint32 lineOffsetParameters = 18;
+
+ for(sint32 i=0; iimlListCount; i++)
+ {
+ // don't log NOP instructions unless they have an associated PPC address
+ if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_NO_OP && imlSegment->imlList[i].associatedPPCAddress == MPTR_NULL)
+ continue;
+ strOutput.reset();
+ strOutput.addFmt("{:08x} ", imlSegment->imlList[i].associatedPPCAddress);
+ if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME || imlSegment->imlList[i].type == PPCREC_IML_TYPE_NAME_R)
+ {
+ if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME)
+ strOutput.add("LD_NAME");
+ else
+ strOutput.add("ST_NAME");
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_name.registerIndex);
+
+ strOutput.addFmt("name_{} (", imlSegment->imlList[i].op_r_name.registerIndex, imlSegment->imlList[i].op_r_name.name);
+ if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_R0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_R0+999) )
+ {
+ strOutput.addFmt("r{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_R0);
+ }
+ else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_SPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_SPR0+999) )
+ {
+ strOutput.addFmt("spr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_SPR0);
+ }
+ else
+ strOutput.add("ukn");
+ strOutput.add(")");
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R )
+ {
+ strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList+i));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r.registerResult);
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r.registerA, true);
+
+ if( imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER )
+ {
+ strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister);
+ }
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R_R )
+ {
+ strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerResult);
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerA);
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerB, true);
+ if( imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER )
+ {
+ strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister);
+ }
+ }
+ else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R_S32)
+ {
+ strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_s32.registerResult);
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_s32.registerA);
+ PPCRecDebug_addS32Param(strOutput, imlSegment->imlList[i].op_r_r_s32.immS32, true);
+
+ if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER)
+ {
+ strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister);
+ }
+ }
+ else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_S32)
+ {
+ strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i));
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_immS32.registerIndex);
+ PPCRecDebug_addS32Param(strOutput, imlSegment->imlList[i].op_r_immS32.immS32, true);
+
+ if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER)
+ {
+ strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister);
+ }
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK )
+ {
+ strOutput.addFmt("jm_{:08x}:", imlSegment->imlList[i].op_jumpmark.address);
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_PPC_ENTER )
+ {
+ strOutput.addFmt("ppcEnter_{:08x}:", imlSegment->imlList[i].op_ppcEnter.ppcAddress);
+ }
+ else if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE ||
+ imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE_INDEXED )
+ {
+ if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD || imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED)
+ strOutput.add("LD_");
+ else
+ strOutput.add("ST_");
+
+ if (imlSegment->imlList[i].op_storeLoad.flags2.signExtend)
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{}", imlSegment->imlList[i].op_storeLoad.copyWidth);
+
+ while ((sint32)strOutput.getLen() < lineOffsetParameters)
+ strOutput.add(" ");
+
+ PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_storeLoad.registerData);
+
+ if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE_INDEXED)
+ strOutput.addFmt("[t{}+t{}]", imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.registerMem2);
+ else
+ strOutput.addFmt("[t{}+{}]", imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32);
+ }
+ else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_MEM2MEM)
+ {
+ strOutput.addFmt("{} [t{}+{}] = [t{}+{}]", imlSegment->imlList[i].op_mem2mem.copyWidth, imlSegment->imlList[i].op_mem2mem.dst.registerMem, imlSegment->imlList[i].op_mem2mem.dst.immS32, imlSegment->imlList[i].op_mem2mem.src.registerMem, imlSegment->imlList[i].op_mem2mem.src.immS32);
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_CJUMP )
+ {
+ if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E)
+ strOutput.add("JE");
+ else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE)
+ strOutput.add("JNE");
+ else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G)
+ strOutput.add("JG");
+ else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE)
+ strOutput.add("JGE");
+ else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L)
+ strOutput.add("JL");
+ else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE)
+ strOutput.add("JLE");
+ else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE)
+ strOutput.add("JALW"); // jump always
+ else
+ cemu_assert_unimplemented();
+ strOutput.addFmt(" jm_{:08x} (cr{})", imlSegment->imlList[i].op_conditionalJump.jumpmarkAddress, imlSegment->imlList[i].crRegister);
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_NO_OP )
+ {
+ strOutput.add("NOP");
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_MACRO )
+ {
+ if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BLR )
+ {
+ strOutput.addFmt("MACRO BLR 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BLRL )
+ {
+ strOutput.addFmt("MACRO BLRL 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BCTR )
+ {
+ strOutput.addFmt("MACRO BCTR 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BCTRL )
+ {
+ strOutput.addFmt("MACRO BCTRL 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BL )
+ {
+ strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2, (sint32)imlSegment->imlList[i].op_macro.paramU16);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_B_FAR )
+ {
+ strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2, (sint32)imlSegment->imlList[i].op_macro.paramU16);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_LEAVE )
+ {
+ strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", imlSegment->imlList[i].op_macro.param);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_HLE )
+ {
+ strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_MFTB )
+ {
+ strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2);
+ }
+ else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_COUNT_CYCLES )
+ {
+ strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", imlSegment->imlList[i].op_macro.param);
+ }
+ else
+ {
+ strOutput.addFmt("MACRO ukn operation {}", imlSegment->imlList[i].operation);
+ }
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_NAME )
+ {
+ strOutput.addFmt("fpr_t{} = name_{} (", imlSegment->imlList[i].op_r_name.registerIndex, imlSegment->imlList[i].op_r_name.name);
+ if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_FPR0+999) )
+ {
+ strOutput.addFmt("fpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_FPR0);
+ }
+ else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) )
+ {
+ strOutput.addFmt("tempFpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0);
+ }
+ else
+ strOutput.add("ukn");
+ strOutput.add(")");
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_NAME_R )
+ {
+ strOutput.addFmt("name_{} (", imlSegment->imlList[i].op_r_name.name);
+ if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_FPR0+999) )
+ {
+ strOutput.addFmt("fpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_FPR0);
+ }
+ else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) )
+ {
+ strOutput.addFmt("tempFpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0);
+ }
+ else
+ strOutput.add("ukn");
+ strOutput.addFmt(") = fpr_t{}", imlSegment->imlList[i].op_r_name.registerIndex);
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_LOAD )
+ {
+ strOutput.addFmt("fpr_t{} = ", imlSegment->imlList[i].op_storeLoad.registerData);
+ if( imlSegment->imlList[i].op_storeLoad.flags2.signExtend )
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{} [t{}+{}] mode {}", imlSegment->imlList[i].op_storeLoad.copyWidth / 8, imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32, imlSegment->imlList[i].op_storeLoad.mode);
+ if (imlSegment->imlList[i].op_storeLoad.flags2.notExpanded)
+ {
+ strOutput.addFmt(" ");
+ }
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_STORE )
+ {
+ if( imlSegment->imlList[i].op_storeLoad.flags2.signExtend )
+ strOutput.add("S");
+ else
+ strOutput.add("U");
+ strOutput.addFmt("{} [t{}+{}]", imlSegment->imlList[i].op_storeLoad.copyWidth/8, imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32);
+ strOutput.addFmt("= fpr_t{} mode {}\n", imlSegment->imlList[i].op_storeLoad.registerData, imlSegment->imlList[i].op_storeLoad.mode);
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R )
+ {
+ strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i]));
+ strOutput.addFmt("fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r.registerOperand);
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R_R_R )
+ {
+ strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i]));
+ strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandA, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandB, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandC);
+ }
+ else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R_R )
+ {
+ strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i]));
+ strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r_r.registerOperandA, imlSegment->imlList[i].op_fpr_r_r_r.registerOperandB);
+ }
+ else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
+ {
+ strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", imlSegment->imlList[i].op_conditionalJump.jumpmarkAddress);
+ }
+ else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
+ {
+ strOutput.addFmt("t{} ", imlSegment->imlList[i].op_conditional_r_s32.registerIndex);
+ bool displayAsHex = false;
+ if (imlSegment->imlList[i].operation == PPCREC_IML_OP_ASSIGN)
+ {
+ displayAsHex = true;
+ strOutput.add("=");
+ }
+ else
+ strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", imlSegment->imlList[i].operation);
+ if (displayAsHex)
+ strOutput.addFmt(" 0x{:x}", imlSegment->imlList[i].op_conditional_r_s32.immS32);
+ else
+ strOutput.addFmt(" {}", imlSegment->imlList[i].op_conditional_r_s32.immS32);
+ strOutput.add(" (conditional)");
+ if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER)
+ {
+ strOutput.addFmt(" -> and update CR{}", imlSegment->imlList[i].crRegister);
+ }
+ }
+ else
+ {
+ strOutput.addFmt("Unknown iml type {}", imlSegment->imlList[i].type);
+ }
+ debug_printf("%s", strOutput.c_str());
+ if (printLivenessRangeInfo)
+ {
+ PPCRecompilerDebug_printLivenessRangeInfo(strOutput, imlSegment, i);
+ }
+ debug_printf("\n");
+ }
+ // all ranges
+ if (printLivenessRangeInfo)
+ {
+ debug_printf("Ranges-VirtReg ");
+ raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while(subrangeItr)
+ {
+ debug_printf("v%-2d", subrangeItr->range->virtualRegister);
+ subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
+ }
+ debug_printf("\n");
+ debug_printf("Ranges-PhysReg ");
+ subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
+ while (subrangeItr)
+ {
+ debug_printf("p%-2d", subrangeItr->range->physicalRegister);
+ subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
+ }
+ debug_printf("\n");
+ }
+ // branch info
+ debug_printf("Links from: ");
+ for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++)
+ {
+ if (i)
+ debug_printf(", ");
+ debug_printf("%p", (void*)imlSegment->list_prevSegments[i]);
+ }
+ debug_printf("\n");
+ debug_printf("Links to: ");
+ if (imlSegment->nextSegmentBranchNotTaken)
+ debug_printf("%p (no branch), ", (void*)imlSegment->nextSegmentBranchNotTaken);
+ if (imlSegment->nextSegmentBranchTaken)
+ debug_printf("%p (branch)", (void*)imlSegment->nextSegmentBranchTaken);
+ debug_printf("\n");
+}
+
+void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext)
+{
+ for(sint32 f=0; fsegmentListCount; f++)
+ {
+ PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[f];
+ PPCRecompiler_dumpIMLSegment(imlSegment, f);
+ debug_printf("\n");
+ }
+}
+
+void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index)
{
segmentPoint->imlSegment = imlSegment;
- segmentPoint->SetInstructionIndex(index);
+ segmentPoint->index = index;
if (imlSegment->segmentPointList)
imlSegment->segmentPointList->prev = segmentPoint;
segmentPoint->prev = nullptr;
@@ -1869,7 +3469,7 @@ void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment*
imlSegment->segmentPointList = segmentPoint;
}
-void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint)
+void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint)
{
if (segmentPoint->prev)
segmentPoint->prev->next = segmentPoint->next;
@@ -1881,60 +3481,147 @@ void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint)
/*
* Insert multiple no-op instructions
-* Warning: Can invalidate any previous instruction pointers from the same segment
+* Warning: Can invalidate any previous instruction structs from the same segment
*/
-void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount)
+void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount)
{
- cemu_assert_debug(index >= 0 && index <= imlSegment->imlList.size());
-
- imlSegment->imlList.insert(imlSegment->imlList.begin() + index, shiftBackCount, {});
-
- memset(imlSegment->imlList.data() + index, 0, sizeof(IMLInstruction) * shiftBackCount);
+ cemu_assert(index >= 0 && index <= imlSegment->imlListCount);
+ if (imlSegment->imlListCount + shiftBackCount > imlSegment->imlListSize)
+ {
+ sint32 newSize = imlSegment->imlListCount + shiftBackCount + std::max(2, imlSegment->imlListSize/2);
+ imlSegment->imlList = (PPCRecImlInstruction_t*)realloc(imlSegment->imlList, sizeof(PPCRecImlInstruction_t)*newSize);
+ imlSegment->imlListSize = newSize;
+ }
+ for (sint32 i = (sint32)imlSegment->imlListCount - 1; i >= index; i--)
+ {
+ memcpy(imlSegment->imlList + (i + shiftBackCount), imlSegment->imlList + i, sizeof(PPCRecImlInstruction_t));
+ }
// fill empty space with NOP instructions
for (sint32 i = 0; i < shiftBackCount; i++)
{
imlSegment->imlList[index + i].type = PPCREC_IML_TYPE_NONE;
}
+ imlSegment->imlListCount += shiftBackCount;
- // update position of segment points
if (imlSegment->segmentPointList)
{
- IMLSegmentPoint* segmentPoint = imlSegment->segmentPointList;
+ ppcRecompilerSegmentPoint_t* segmentPoint = imlSegment->segmentPointList;
while (segmentPoint)
{
- segmentPoint->ShiftIfAfter(index, shiftBackCount);
+ if (segmentPoint->index != RA_INTER_RANGE_START && segmentPoint->index != RA_INTER_RANGE_END)
+ {
+ if (segmentPoint->index >= index)
+ segmentPoint->index += shiftBackCount;
+ }
+ // next
segmentPoint = segmentPoint->next;
}
}
}
-IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 index)
+/*
+* Insert and return new instruction at index
+* Warning: Can invalidate any previous instruction structs from the same segment
+*/
+PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index)
{
PPCRecompiler_pushBackIMLInstructions(imlSegment, index, 1);
- return imlSegment->imlList.data() + index;
+ return imlSegment->imlList + index;
}
-IMLInstruction* PPCRecompiler_appendInstruction(IMLSegment* imlSegment)
+/*
+* Append and return new instruction at the end of the segment
+* Warning: Can invalidate any previous instruction structs from the same segment
+*/
+PPCRecImlInstruction_t* PPCRecompiler_appendInstruction(PPCRecImlSegment_t* imlSegment)
{
- size_t index = imlSegment->imlList.size();
- imlSegment->imlList.emplace_back();
- memset(imlSegment->imlList.data() + index, 0, sizeof(IMLInstruction));
- return imlSegment->imlList.data() + index;
-}
-
-IMLSegment* PPCRecompilerIml_appendSegment(ppcImlGenContext_t* ppcImlGenContext)
-{
- IMLSegment* segment = new IMLSegment();
- ppcImlGenContext->segmentList2.emplace_back(segment);
- return segment;
+ sint32 index = imlSegment->imlListCount;
+ if (index >= imlSegment->imlListSize)
+ {
+ sint32 newSize = index+1;
+ imlSegment->imlList = (PPCRecImlInstruction_t*)realloc(imlSegment->imlList, sizeof(PPCRecImlInstruction_t)*newSize);
+ imlSegment->imlListSize = newSize;
+ }
+ imlSegment->imlListCount++;
+ memset(imlSegment->imlList + index, 0, sizeof(PPCRecImlInstruction_t));
+ return imlSegment->imlList + index;
}
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count)
{
- ppcImlGenContext->segmentList2.insert(ppcImlGenContext->segmentList2.begin() + index, count, nullptr);
- for (sint32 i = 0; i < count; i++)
- ppcImlGenContext->segmentList2[index + i] = new IMLSegment();
+ if( (ppcImlGenContext->segmentListCount+count) > ppcImlGenContext->segmentListSize )
+ {
+ // allocate space for more segments
+ ppcImlGenContext->segmentListSize += count;
+ ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*));
+ }
+ for(sint32 i=(sint32)ppcImlGenContext->segmentListCount-1; i>=index; i--)
+ {
+ memcpy(ppcImlGenContext->segmentList+(i+count), ppcImlGenContext->segmentList+i, sizeof(PPCRecImlSegment_t*));
+ }
+ ppcImlGenContext->segmentListCount += count;
+ for(sint32 i=0; isegmentList+index+i, 0x00, sizeof(PPCRecImlSegment_t*));
+ ppcImlGenContext->segmentList[index+i] = (PPCRecImlSegment_t*)malloc(sizeof(PPCRecImlSegment_t));
+ memset(ppcImlGenContext->segmentList[index+i], 0x00, sizeof(PPCRecImlSegment_t));
+ ppcImlGenContext->segmentList[index + i]->list_prevSegments = std::vector();
+ }
+}
+
+/*
+ * Allocate and init a new iml instruction segment
+ */
+PPCRecImlSegment_t* PPCRecompiler_generateImlSegment(ppcImlGenContext_t* ppcImlGenContext)
+{
+ if( ppcImlGenContext->segmentListCount >= ppcImlGenContext->segmentListSize )
+ {
+ // allocate space for more segments
+ ppcImlGenContext->segmentListSize *= 2;
+ ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*));
+ }
+ PPCRecImlSegment_t* ppcRecSegment = new PPCRecImlSegment_t();
+ ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount] = ppcRecSegment;
+ ppcImlGenContext->segmentListCount++;
+ return ppcRecSegment;
+}
+
+void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext)
+{
+ if (ppcImlGenContext->imlList)
+ {
+ free(ppcImlGenContext->imlList);
+ ppcImlGenContext->imlList = nullptr;
+ }
+ for(sint32 i=0; isegmentListCount; i++)
+ {
+ free(ppcImlGenContext->segmentList[i]->imlList);
+ delete ppcImlGenContext->segmentList[i];
+ }
+ ppcImlGenContext->segmentListCount = 0;
+ if (ppcImlGenContext->segmentList)
+ {
+ free(ppcImlGenContext->segmentList);
+ ppcImlGenContext->segmentList = nullptr;
+ }
+}
+
+bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml)
+{
+ if (iml->type == PPCREC_IML_TYPE_MACRO && (iml->operation == PPCREC_IML_MACRO_BLR || iml->operation == PPCREC_IML_MACRO_BCTR) ||
+ iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BL ||
+ iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_B_FAR ||
+ iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BLRL ||
+ iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BCTRL ||
+ iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_LEAVE ||
+ iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_HLE ||
+ iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_MFTB ||
+ iml->type == PPCREC_IML_TYPE_PPC_ENTER ||
+ iml->type == PPCREC_IML_TYPE_CJUMP ||
+ iml->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
+ return true;
+ return false;
}
bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
@@ -1956,18 +3643,15 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
switch (PPC_getBits(opcode, 25, 5))
{
case 0:
- if( !PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode) )
- unsupportedInstructionFound = true;
+ PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode);
ppcImlGenContext->hasFPUInstruction = true;
break;
case 1:
- if( !PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode) )
- unsupportedInstructionFound = true;
+ PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode);
ppcImlGenContext->hasFPUInstruction = true;
break;
case 2:
- if( !PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode) )
- unsupportedInstructionFound = true;
+ PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode);
ppcImlGenContext->hasFPUInstruction = true;
break;
default:
@@ -2008,23 +3692,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 12: // PS_MULS0
- if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, false) == false)
+ case 12: // multiply scalar
+ if (PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 13: // PS_MULS1
- if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, true) == false)
+ case 13: // multiply scalar
+ if (PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 14: // PS_MADDS0
- if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, false) == false)
+ case 14: // multiply add scalar
+ if (PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 15: // PS_MADDS1
- if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, true) == false)
+ case 15: // multiply add scalar
+ if (PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
@@ -2091,22 +3775,22 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 28: // PS_MSUB
- if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, false) == false)
+ case 28: // multiply sub paired
+ if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 29: // PS_MADD
+ case 29: // multiply add paired
if (PPCRecompilerImlGen_PS_MADD(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 30: // PS_NMSUB
- if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, true) == false)
+ case 30: // negative multiply sub paired
+ if (PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 31: // PS_NMADD
+ case 31: // negative multiply add paired
if (PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
@@ -2120,23 +3804,20 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
PPCRecompilerImlGen_MULLI(ppcImlGenContext, opcode);
break;
case 8: // SUBFIC
- if (!PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode))
- unsupportedInstructionFound = true;
+ PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode);
break;
case 10: // CMPLI
- if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, true))
- unsupportedInstructionFound = true;
+ PPCRecompilerImlGen_CMPLI(ppcImlGenContext, opcode);
break;
case 11: // CMPI
- if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, false))
- unsupportedInstructionFound = true;
+ PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode);
break;
case 12: // ADDIC
- if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, false) == false)
+ if (PPCRecompilerImlGen_ADDIC(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 13: // ADDIC.
- if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, true) == false)
+ if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 14: // ADDI
@@ -2168,11 +3849,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
case 19: // opcode category 19
switch (PPC_getBits(opcode, 30, 10))
{
- case 0:
- PPCRecompilerImlGen_MCRF(ppcImlGenContext, opcode);
- break;
- case 16: // BCLR
- if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_LR) == false)
+ case 16:
+ if (PPCRecompilerImlGen_BCLR(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 129:
@@ -2203,8 +3881,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
if (PPCRecompilerImlGen_CROR(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 528: // BCCTR
- if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_CTR) == false)
+ case 528:
+ if (PPCRecompilerImlGen_BCCTR(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
default:
@@ -2224,34 +3902,37 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
if (PPCRecompilerImlGen_RLWNM(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 24: // ORI
- PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext, opcode, false);
+ case 24:
+ PPCRecompilerImlGen_ORI(ppcImlGenContext, opcode);
break;
- case 25: // ORIS
- PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext, opcode, true);
+ case 25:
+ PPCRecompilerImlGen_ORIS(ppcImlGenContext, opcode);
break;
- case 26: // XORI
- PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext, opcode, false);
+ case 26:
+ PPCRecompilerImlGen_XORI(ppcImlGenContext, opcode);
break;
- case 27: // XORIS
- PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext, opcode, true);
+ case 27:
+ PPCRecompilerImlGen_XORIS(ppcImlGenContext, opcode);
break;
- case 28: // ANDI
- PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext, opcode, false);
+ case 28:
+ PPCRecompilerImlGen_ANDI(ppcImlGenContext, opcode);
break;
- case 29: // ANDIS
- PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext, opcode, true);
+ case 29:
+ PPCRecompilerImlGen_ANDIS(ppcImlGenContext, opcode);
break;
case 31: // opcode category
switch (PPC_getBits(opcode, 30, 10))
{
case 0:
- PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, false);
+ PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode);
break;
case 4:
PPCRecompilerImlGen_TW(ppcImlGenContext, opcode);
break;
case 8:
+ // todo: Check if we can optimize this pattern:
+ // SUBFC + SUBFE
+ // SUBFC
if (PPCRecompilerImlGen_SUBFC(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
@@ -2271,8 +3952,9 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
if (PPCRecompilerImlGen_LWARX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 23: // LWZX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false);
+ case 23:
+ if (PPCRecompilerImlGen_LWZX(ppcImlGenContext, opcode) == false)
+ unsupportedInstructionFound = true;
break;
case 24:
if (PPCRecompilerImlGen_SLW(ppcImlGenContext, opcode) == false)
@@ -2282,12 +3964,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
if (PPCRecompilerImlGen_CNTLZW(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 28: // AND
- if (!PPCRecompilerImlGen_AND_NAND(ppcImlGenContext, opcode, false))
+ case 28:
+ if (PPCRecompilerImlGen_AND(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 32:
- PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, true); // CMPL
+ PPCRecompilerImlGen_CMPL(ppcImlGenContext, opcode);
break;
case 40:
if (PPCRecompilerImlGen_SUBF(ppcImlGenContext, opcode) == false)
@@ -2296,11 +3978,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
case 54:
// DBCST - Generates no code
break;
- case 55: // LWZUX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true);
+ case 55:
+ if (PPCRecompilerImlGen_LWZUX(ppcImlGenContext, opcode) == false)
+ unsupportedInstructionFound = true;
break;
- case 60: // ANDC
- if (!PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode))
+ case 60:
+ if (PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 75:
@@ -2310,18 +3993,20 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
case 86:
// DCBF -> No-Op
break;
- case 87: // LBZX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false);
+ case 87:
+ if (PPCRecompilerImlGen_LBZX(ppcImlGenContext, opcode) == false)
+ unsupportedInstructionFound = true;
break;
case 104:
if (PPCRecompilerImlGen_NEG(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 119: // LBZUX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true);
+ case 119:
+ if (PPCRecompilerImlGen_LBZUX(ppcImlGenContext, opcode) == false)
+ unsupportedInstructionFound = true;
break;
- case 124: // NOR
- if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, true))
+ case 124:
+ if (PPCRecompilerImlGen_NOR(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 136:
@@ -2333,20 +4018,19 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true;
break;
case 144:
- if( !PPCRecompilerImlGen_MTCRF(ppcImlGenContext, opcode))
- unsupportedInstructionFound = true;
+ PPCRecompilerImlGen_MTCRF(ppcImlGenContext, opcode);
break;
case 150:
- if (!PPCRecompilerImlGen_STWCX(ppcImlGenContext, opcode))
+ if (PPCRecompilerImlGen_STWCX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 151: // STWX
- if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, true, false))
+ case 151:
+ if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32) == false)
unsupportedInstructionFound = true;
break;
- case 183: // STWUX
- if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, true, true))
- unsupportedInstructionFound = true;
+ case 183:
+ if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 32) == false)
+ unsupportedInstructionFound = true;
break;
case 200:
if (PPCRecompilerImlGen_SUBFZE(ppcImlGenContext, opcode) == false)
@@ -2356,8 +4040,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
if (PPCRecompilerImlGen_ADDZE(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 215: // STBX
- if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8, true, false))
+ case 215:
+ if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8) == false)
unsupportedInstructionFound = true;
break;
case 234:
@@ -2368,56 +4052,59 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
if (PPCRecompilerImlGen_MULLW(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 247: // STBUX
- if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8, true, true))
+ case 247:
+ if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 8) == false)
unsupportedInstructionFound = true;
break;
case 266:
if (PPCRecompilerImlGen_ADD(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 279: // LHZX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false);
- break;
- case 284: // EQV (alias to NXOR)
- if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, true))
+ case 279:
+ if (PPCRecompilerImlGen_LHZX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 311: // LHZUX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true);
+ case 284:
+ PPCRecompilerImlGen_EQV(ppcImlGenContext, opcode);
break;
- case 316: // XOR
- if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, false))
+ case 311:
+ if (PPCRecompilerImlGen_LHZUX(ppcImlGenContext, opcode) == false)
+ unsupportedInstructionFound = true;
+ break;
+ case 316:
+ if (PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 339:
if (PPCRecompilerImlGen_MFSPR(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 343: // LHAX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false);
+ case 343:
+ if (PPCRecompilerImlGen_LHAX(ppcImlGenContext, opcode) == false)
+ unsupportedInstructionFound = true;
break;
case 371:
if (PPCRecompilerImlGen_MFTB(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 375: // LHAUX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true);
+ case 375:
+ if (PPCRecompilerImlGen_LHAUX(ppcImlGenContext, opcode) == false)
+ unsupportedInstructionFound = true;
break;
- case 407: // STHX
- if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, false))
+ case 407:
+ if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16) == false)
unsupportedInstructionFound = true;
break;
case 412:
if (PPCRecompilerImlGen_ORC(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 439: // STHUX
- if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, true))
+ case 439:
+ if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 16) == false)
unsupportedInstructionFound = true;
break;
- case 444: // OR
- if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, false))
+ case 444:
+ if (PPCRecompilerImlGen_OR(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 459:
@@ -2427,19 +4114,17 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
if (PPCRecompilerImlGen_MTSPR(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 476: // NAND
- if (!PPCRecompilerImlGen_AND_NAND(ppcImlGenContext, opcode, true))
- unsupportedInstructionFound = true;
- break;
case 491:
if (PPCRecompilerImlGen_DIVW(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 534: // LWBRX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false);
+ case 534:
+ if (PPCRecompilerImlGen_LWBRX(ppcImlGenContext, opcode) == false)
+ unsupportedInstructionFound = true;
+ ppcImlGenContext->hasFPUInstruction = true;
break;
- case 535: // LFSX
- if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, false, false) == false)
+ case 535:
+ if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
@@ -2447,8 +4132,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
if (PPCRecompilerImlGen_SRW(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 567: // LFSUX
- if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, true, false) == false)
+ case 567:
+ if (PPCRecompilerImlGen_LFSUX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
@@ -2459,42 +4144,38 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
case 598:
PPCRecompilerImlGen_SYNC(ppcImlGenContext, opcode);
break;
- case 599: // LFDX
- if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, false, true) == false)
+ case 599:
+ if (PPCRecompilerImlGen_LFDX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 631: // LFDUX
- if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, true, true) == false)
+ case 631:
+ if (PPCRecompilerImlGen_LFDUX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 662: // STWBRX
- if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, false, false))
+ case 662:
+ if (PPCRecompilerImlGen_STWBRX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 663: // STFSX
- if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, false, false) == false)
+ case 663:
+ if (PPCRecompilerImlGen_STFSX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 695: // STFSUX
- if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, true, false) == false)
+ case 695:
+ if (PPCRecompilerImlGen_STFSUX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
case 725:
if (PPCRecompilerImlGen_STSWI(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 727: // STFDX
- if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, false, true) == false)
+ case 727:
+ if (PPCRecompilerImlGen_STFDX(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
break;
- case 759: // STFDUX
- if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, true, true) == false)
- unsupportedInstructionFound = true;
- break;
- case 790: // LHBRX
- PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false);
+ case 790:
+ PPCRecompilerImlGen_LHBRX(ppcImlGenContext, opcode);
break;
case 792:
if (PPCRecompilerImlGen_SRAW(ppcImlGenContext, opcode) == false)
@@ -2505,7 +4186,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true;
break;
case 918: // STHBRX
- if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, false, true))
+ if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true) == false)
unsupportedInstructionFound = true;
break;
case 922:
@@ -2529,61 +4210,47 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
break;
}
break;
- case 32: // LWZ
- if(!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 32, false, true, false))
- unsupportedInstructionFound = true;
+ case 32:
+ PPCRecompilerImlGen_LWZ(ppcImlGenContext, opcode);
break;
- case 33: // LWZU
- if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 32, false, true, true))
- unsupportedInstructionFound = true;
+ case 33:
+ PPCRecompilerImlGen_LWZU(ppcImlGenContext, opcode);
break;
- case 34: // LBZ
- if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 8, false, true, false))
- unsupportedInstructionFound = true;
+ case 34:
+ PPCRecompilerImlGen_LBZ(ppcImlGenContext, opcode);
break;
- case 35: // LBZU
- if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 8, false, true, true))
- unsupportedInstructionFound = true;
+ case 35:
+ PPCRecompilerImlGen_LBZU(ppcImlGenContext, opcode);
break;
- case 36: // STW
- if(!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 32, true, false))
- unsupportedInstructionFound = true;
+ case 36:
+ PPCRecompilerImlGen_STW(ppcImlGenContext, opcode);
break;
- case 37: // STWU
- if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 32, true, true))
- unsupportedInstructionFound = true;
+ case 37:
+ PPCRecompilerImlGen_STWU(ppcImlGenContext, opcode);
break;
- case 38: // STB
- if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 8, true, false))
- unsupportedInstructionFound = true;
+ case 38:
+ PPCRecompilerImlGen_STB(ppcImlGenContext, opcode);
break;
- case 39: // STBU
- if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 8, true, true))
- unsupportedInstructionFound = true;
+ case 39:
+ PPCRecompilerImlGen_STBU(ppcImlGenContext, opcode);
break;
- case 40: // LHZ
- if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, false, true, false))
- unsupportedInstructionFound = true;
+ case 40:
+ PPCRecompilerImlGen_LHZ(ppcImlGenContext, opcode);
break;
- case 41: // LHZU
- if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, false, true, true))
- unsupportedInstructionFound = true;
+ case 41:
+ PPCRecompilerImlGen_LHZU(ppcImlGenContext, opcode);
break;
- case 42: // LHA
- if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, true, true, false))
- unsupportedInstructionFound = true;
+ case 42:
+ PPCRecompilerImlGen_LHA(ppcImlGenContext, opcode);
break;
- case 43: // LHAU
- if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, true, true, true))
- unsupportedInstructionFound = true;
+ case 43:
+ PPCRecompilerImlGen_LHAU(ppcImlGenContext, opcode);
break;
- case 44: // STH
- if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 16, true, false))
- unsupportedInstructionFound = true;
+ case 44:
+ PPCRecompilerImlGen_STH(ppcImlGenContext, opcode);
break;
- case 45: // STHU
- if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 16, true, true))
- unsupportedInstructionFound = true;
+ case 45:
+ PPCRecompilerImlGen_STHU(ppcImlGenContext, opcode);
break;
case 46:
PPCRecompilerImlGen_LMW(ppcImlGenContext, opcode);
@@ -2591,53 +4258,53 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
case 47:
PPCRecompilerImlGen_STMW(ppcImlGenContext, opcode);
break;
- case 48: // LFS
- if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, false, false) == false)
+ case 48:
+ if (PPCRecompilerImlGen_LFS(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 49: // LFSU
- if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, true, false) == false)
+ case 49:
+ if (PPCRecompilerImlGen_LFSU(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 50: // LFD
- if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, false, true) == false)
+ case 50:
+ if (PPCRecompilerImlGen_LFD(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 51: // LFDU
- if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, true, true) == false)
+ case 51:
+ if (PPCRecompilerImlGen_LFDU(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 52: // STFS
- if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, false, false) == false)
+ case 52:
+ if (PPCRecompilerImlGen_STFS(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 53: // STFSU
- if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, true, false) == false)
+ case 53:
+ if (PPCRecompilerImlGen_STFSU(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 54: // STFD
- if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, false, true) == false)
+ case 54:
+ if (PPCRecompilerImlGen_STFD(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
- case 55: // STFDU
- if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, true, true) == false)
+ case 55:
+ if (PPCRecompilerImlGen_STFDU(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 56:
- if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, false) == false)
+ if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 57:
- if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, true) == false)
+ if (PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
@@ -2690,12 +4357,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
}
break;
case 60:
- if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, false) == false)
+ if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 61:
- if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, true) == false)
+ if (PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
@@ -2804,482 +4471,556 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
return unsupportedInstructionFound;
}
-// returns false if code flow is not interrupted
-bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget)
+bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses)
{
- hasBranchTarget = false;
- branchTarget = 0xFFFFFFFF;
- makeNextInstEnterable = false;
- continueDefaultPath = false;
- switch (Espresso::GetPrimaryOpcode(opcode))
- {
- case Espresso::PrimaryOpcode::VIRTUAL_HLE:
- {
- makeNextInstEnterable = true;
- hasBranchTarget = false;
- continueDefaultPath = false;
- return true;
- }
- case Espresso::PrimaryOpcode::BC:
- {
- uint32 BD, BI;
- Espresso::BOField BO;
- bool AA, LK;
- Espresso::decodeOp_BC(opcode, BD, BO, BI, AA, LK);
- if (!LK)
- {
- hasBranchTarget = true;
- branchTarget = (AA ? BD : BD) + instructionAddress;
- if (!boundaryTracker.ContainsAddress(branchTarget))
- hasBranchTarget = false; // far jump
- }
- makeNextInstEnterable = LK;
- continueDefaultPath = true;
- return true;
- }
- case Espresso::PrimaryOpcode::B:
- {
- uint32 LI;
- bool AA, LK;
- Espresso::decodeOp_B(opcode, LI, AA, LK);
- if (!LK)
- {
- hasBranchTarget = true;
- branchTarget = AA ? LI : LI + instructionAddress;
- if (!boundaryTracker.ContainsAddress(branchTarget))
- hasBranchTarget = false; // far jump
- }
- makeNextInstEnterable = LK;
- continueDefaultPath = false;
- return true;
- }
- case Espresso::PrimaryOpcode::GROUP_19:
- switch (Espresso::GetGroup19Opcode(opcode))
- {
- case Espresso::Opcode19::BCLR:
- case Espresso::Opcode19::BCCTR:
- {
- Espresso::BOField BO;
- uint32 BI;
- bool LK;
- Espresso::decodeOp_BCSPR(opcode, BO, BI, LK);
- continueDefaultPath = !BO.conditionIgnore() || !BO.decrementerIgnore(); // if branch is always taken then there is no continued path
- makeNextInstEnterable = Espresso::DecodeLK(opcode);
- return true;
- }
- default:
- break;
- }
- break;
- case Espresso::PrimaryOpcode::GROUP_31:
- switch (Espresso::GetGroup31Opcode(opcode))
- {
- default:
- break;
- }
- break;
- default:
- break;
- }
- return false;
-}
-
-void PPCRecompiler_DetermineBasicBlockRange(std::vector& basicBlockList, PPCFunctionBoundaryTracker& boundaryTracker, uint32 ppcStart, uint32 ppcEnd, const std::set& combinedBranchTargets, const std::set& entryAddresses)
-{
- cemu_assert_debug(ppcStart <= ppcEnd);
-
- uint32 currentAddr = ppcStart;
-
- PPCBasicBlockInfo* curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses);
-
- uint32 basicBlockStart = currentAddr;
- while (currentAddr <= ppcEnd)
- {
- curBlockInfo->lastAddress = currentAddr;
- uint32 opcode = memory_readU32(currentAddr);
- bool nextInstIsEnterable = false;
- bool hasBranchTarget = false;
- bool hasContinuedFlow = false;
- uint32 branchTarget = 0;
- if (PPCRecompiler_CheckIfInstructionEndsSegment(boundaryTracker, currentAddr, opcode, nextInstIsEnterable, hasContinuedFlow, hasBranchTarget, branchTarget))
- {
- curBlockInfo->hasBranchTarget = hasBranchTarget;
- curBlockInfo->branchTarget = branchTarget;
- curBlockInfo->hasContinuedFlow = hasContinuedFlow;
- // start new basic block, except if this is the last instruction
- if (currentAddr >= ppcEnd)
- break;
- curBlockInfo = &basicBlockList.emplace_back(currentAddr + 4, entryAddresses);
- curBlockInfo->isEnterable = curBlockInfo->isEnterable || nextInstIsEnterable;
- currentAddr += 4;
- continue;
- }
- currentAddr += 4;
- if (currentAddr <= ppcEnd)
- {
- if (combinedBranchTargets.find(currentAddr) != combinedBranchTargets.end())
- {
- // instruction is branch target, start new basic block
- curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses);
- }
- }
-
- }
-}
-
-std::vector PPCRecompiler_DetermineBasicBlockRange(PPCFunctionBoundaryTracker& boundaryTracker, const std::set& entryAddresses)
-{
- cemu_assert(!entryAddresses.empty());
- std::vector basicBlockList;
-
- const std::set