mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-02 13:01:18 +12:00
Compare commits
60 commits
Author | SHA1 | Date | |
---|---|---|---|
|
35ecfa3f54 | ||
|
6c392d5a22 | ||
|
9fb3c76b76 | ||
|
13ccf9a160 | ||
|
7db2b77983 | ||
|
0a121c97c7 | ||
|
e91740cf29 | ||
|
4f4c9594ac | ||
|
5a4731f919 | ||
|
522b5ef260 | ||
|
057ef4598e | ||
|
4f4412b334 | ||
|
00ff5549d9 | ||
|
c8ffff8f41 | ||
|
2f02fda9ea | ||
|
da98aa4176 | ||
|
95dc590d2c | ||
|
f3fe6f3455 | ||
|
2eec6b44c3 | ||
|
3eff2d4a60 | ||
|
d427b59019 | ||
|
a184a04e56 | ||
|
162fdabb9d | ||
|
c8045f7f04 | ||
|
6df3e1742e | ||
|
152b790242 | ||
|
02616bf6be | ||
|
7168d20cde | ||
|
783d88a892 | ||
|
28ea70b6d8 | ||
|
96765e4ac6 | ||
|
111637a9fd | ||
|
bed5fdb195 | ||
|
996539fce8 | ||
|
05617a332b | ||
|
caef34f2ff | ||
|
f801fc1fe8 | ||
|
61484598fc | ||
|
081ebead5f | ||
|
d13dab0fd8 | ||
|
ba09daf328 | ||
|
557aff4024 | ||
|
de542410c2 | ||
|
33d5c6d490 | ||
|
352a918494 | ||
|
d083fc0470 | ||
|
fa7ae84314 | ||
|
00099c5ecc | ||
|
e6a64aadda | ||
|
a5f3558b79 | ||
|
b089ae5b32 | ||
|
06233e3462 | ||
|
4972381edc | ||
|
cd6eb1097b | ||
|
c4eab08f30 | ||
|
57ff99ce53 | ||
|
8b5cafa98e | ||
|
186e92221a | ||
|
31d2db6f78 | ||
|
ebb5ab53e2 |
199 changed files with 21950 additions and 16084 deletions
9
.github/workflows/build.yml
vendored
9
.github/workflows/build.yml
vendored
|
@ -177,6 +177,9 @@ jobs:
|
|||
|
||||
build-macos:
|
||||
runs-on: macos-14
|
||||
strategy:
|
||||
matrix:
|
||||
arch: [x86_64, arm64]
|
||||
steps:
|
||||
- name: "Checkout repo"
|
||||
uses: actions/checkout@v4
|
||||
|
@ -202,7 +205,7 @@ jobs:
|
|||
|
||||
- name: "Install molten-vk"
|
||||
run: |
|
||||
curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.2.9/MoltenVK-macos.tar
|
||||
curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.3.0/MoltenVK-macos.tar
|
||||
tar xf MoltenVK-macos.tar
|
||||
sudo mkdir -p /usr/local/lib
|
||||
sudo cp MoltenVK/MoltenVK/dynamic/dylib/macOS/libMoltenVK.dylib /usr/local/lib
|
||||
|
@ -236,7 +239,7 @@ jobs:
|
|||
cd build
|
||||
cmake .. ${{ env.BUILD_FLAGS }} \
|
||||
-DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \
|
||||
-DCMAKE_OSX_ARCHITECTURES=x86_64 \
|
||||
-DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \
|
||||
-DMACOS_BUNDLE=ON \
|
||||
-G Ninja
|
||||
|
||||
|
@ -259,5 +262,5 @@ jobs:
|
|||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cemu-bin-macos-x64
|
||||
name: cemu-bin-macos-${{ matrix.arch }}
|
||||
path: ./bin/Cemu.dmg
|
||||
|
|
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -18,3 +18,6 @@
|
|||
path = dependencies/imgui
|
||||
url = https://github.com/ocornut/imgui
|
||||
shallow = true
|
||||
[submodule "dependencies/xbyak_aarch64"]
|
||||
path = dependencies/xbyak_aarch64
|
||||
url = https://github.com/fujitsu/xbyak_aarch64
|
||||
|
|
|
@ -166,7 +166,7 @@ if (UNIX AND NOT APPLE)
|
|||
|
||||
if(ENABLE_BLUEZ)
|
||||
find_package(bluez REQUIRED)
|
||||
set(ENABLE_WIIMOTE ON)
|
||||
set(SUPPORTS_WIIMOTE ON)
|
||||
add_compile_definitions(HAS_BLUEZ)
|
||||
endif()
|
||||
|
||||
|
@ -188,7 +188,7 @@ endif()
|
|||
|
||||
if (ENABLE_HIDAPI)
|
||||
find_package(hidapi REQUIRED)
|
||||
set(ENABLE_WIIMOTE ON)
|
||||
set(SUPPORTS_WIIMOTE ON)
|
||||
add_compile_definitions(HAS_HIDAPI)
|
||||
endif ()
|
||||
|
||||
|
@ -222,9 +222,18 @@ endif()
|
|||
|
||||
add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL)
|
||||
|
||||
if (CMAKE_OSX_ARCHITECTURES)
|
||||
set(CEMU_ARCHITECTURE ${CMAKE_OSX_ARCHITECTURES})
|
||||
else()
|
||||
set(CEMU_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
|
||||
endif()
|
||||
if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
|
||||
add_subdirectory("dependencies/xbyak_aarch64" EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
find_package(ZArchive)
|
||||
if (NOT ZArchive_FOUND)
|
||||
add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(src)
|
26
boost.natvis
Normal file
26
boost.natvis
Normal file
|
@ -0,0 +1,26 @@
|
|||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
|
||||
|
||||
<Type Name="boost::container::small_vector<*>">
|
||||
<Expand>
|
||||
<Item Name="[size]">m_holder.m_size</Item>
|
||||
<ArrayItems>
|
||||
<Size>m_holder.m_size</Size>
|
||||
<ValuePointer>m_holder.m_start</ValuePointer>
|
||||
</ArrayItems>
|
||||
</Expand>
|
||||
</Type>
|
||||
|
||||
<Type Name="boost::container::static_vector<*>">
|
||||
<DisplayString>{{ size={m_holder.m_size} }}</DisplayString>
|
||||
<Expand>
|
||||
<Item Name="[size]" ExcludeView="simple">m_holder.m_size</Item>
|
||||
<Item Name="[capacity]" ExcludeView="simple">static_capacity</Item>
|
||||
<ArrayItems>
|
||||
<Size>m_holder.m_size</Size>
|
||||
<ValuePointer>($T1*)m_holder.storage.data</ValuePointer>
|
||||
</ArrayItems>
|
||||
</Expand>
|
||||
</Type>
|
||||
|
||||
</AutoVisualizer>
|
3
dependencies/ih264d/CMakeLists.txt
vendored
3
dependencies/ih264d/CMakeLists.txt
vendored
|
@ -183,6 +183,9 @@ target_sources(ih264d PRIVATE
|
|||
"decoder/arm/ih264d_function_selector.c"
|
||||
)
|
||||
target_compile_options(ih264d PRIVATE -DARMV8)
|
||||
if(APPLE)
|
||||
target_sources(ih264d PRIVATE "common/armv8/macos_arm_symbol_aliases.s")
|
||||
endif()
|
||||
else()
|
||||
message(FATAL_ERROR "ih264d unknown architecture: ${IH264D_ARCHITECTURE}")
|
||||
endif()
|
||||
|
|
|
@ -429,8 +429,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
|
|||
rev64 v7.4h, v2.4h
|
||||
ld1 {v3.2s}, [x10]
|
||||
sub x5, x3, #8
|
||||
#ifdef __APPLE__
|
||||
adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1]
|
||||
#endif
|
||||
usubl v10.8h, v5.8b, v1.8b
|
||||
ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3
|
||||
mov v8.d[1], v9.d[0]
|
||||
|
@ -484,10 +489,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
|
|||
zip1 v1.8h, v0.8h, v2.8h
|
||||
zip2 v2.8h, v0.8h, v2.8h
|
||||
mov v0.16b, v1.16b
|
||||
|
||||
#ifdef __APPLE__
|
||||
adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2]
|
||||
|
||||
#endif
|
||||
ld1 {v8.2s, v9.2s}, [x12]
|
||||
mov v8.d[1], v9.d[0]
|
||||
mov v10.16b, v8.16b
|
||||
|
|
|
@ -431,10 +431,13 @@ ih264_intra_pred_luma_16x16_mode_plane_av8:
|
|||
mov x10, x1 //top_left
|
||||
mov x4, #-1
|
||||
ld1 {v2.2s}, [x1], x8
|
||||
|
||||
#ifdef __APPLE__
|
||||
adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE
|
||||
ldr x7, [x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs
|
||||
ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs]
|
||||
|
||||
#endif
|
||||
ld1 {v0.2s}, [x1]
|
||||
rev64 v2.8b, v2.8b
|
||||
ld1 {v6.2s, v7.2s}, [x7]
|
||||
|
|
|
@ -1029,9 +1029,13 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8:
|
|||
mov v3.d[0], v2.d[1]
|
||||
ext v4.16b, v2.16b , v2.16b , #1
|
||||
mov v5.d[0], v4.d[1]
|
||||
|
||||
#ifdef __APPLE__
|
||||
adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u]
|
||||
#endif
|
||||
uaddl v20.8h, v0.8b, v2.8b
|
||||
uaddl v22.8h, v1.8b, v3.8b
|
||||
uaddl v24.8h, v2.8b, v4.8b
|
||||
|
|
|
@ -142,14 +142,22 @@ ih264_weighted_bi_pred_luma_av8:
|
|||
sxtw x4, w4
|
||||
sxtw x5, w5
|
||||
stp x19, x20, [sp, #-16]!
|
||||
#ifndef __APPLE__
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
neg w10, w6 //w10 = -(log_WD + 1)
|
||||
dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
ldr w11, [sp, #104] //Load ht in w11
|
||||
ldr w12, [sp, #112] //Load wd in w12
|
||||
#else
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #84] //Load ofst1 in w9
|
||||
ldr w10, [sp, #88] //Load ofst2 in w10
|
||||
ldr w11, [sp, #92] //Load ht in w11
|
||||
ldr w12, [sp, #96] //Load wd in w12
|
||||
#endif
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
neg w10, w6 //w10 = -(log_WD + 1)
|
||||
dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
|
||||
add w9, w9, #1 //w9 = ofst1 + 1
|
||||
add w9, w9, w10 //w9 = ofst1 + ofst2 + 1
|
||||
mov v2.s[0], w7
|
||||
|
@ -424,17 +432,24 @@ ih264_weighted_bi_pred_chroma_av8:
|
|||
sxtw x5, w5
|
||||
stp x19, x20, [sp, #-16]!
|
||||
|
||||
|
||||
#ifndef __APPLE__
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
ldr w11, [sp, #104] //Load ht in w11
|
||||
ldr w12, [sp, #112] //Load wd in w12
|
||||
#else
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #84] //Load ofst1 in w9
|
||||
ldr w10, [sp, #88] //Load ofst2 in w10
|
||||
ldr w11, [sp, #92] //Load ht in w11
|
||||
ldr w12, [sp, #96] //Load wd in w12
|
||||
#endif
|
||||
dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit)
|
||||
dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit)
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
neg w20, w6 //w20 = -(log_WD + 1)
|
||||
dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit)
|
||||
ldr w11, [sp, #104] //Load ht in x11
|
||||
ldr w12, [sp, #112] //Load wd in x12
|
||||
dup v20.8h, w9 //0ffset1
|
||||
dup v21.8h, w10 //0ffset2
|
||||
srhadd v6.8b, v20.8b, v21.8b
|
||||
|
|
185
dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s
vendored
Normal file
185
dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s
vendored
Normal file
|
@ -0,0 +1,185 @@
|
|||
// macOS clang compilers append preceding underscores to function names, this is to prevent
|
||||
// mismatches with the assembly function names and the C functions as defined in the header.
|
||||
|
||||
.global _ih264_deblk_chroma_horz_bs4_av8
|
||||
_ih264_deblk_chroma_horz_bs4_av8 = ih264_deblk_chroma_horz_bs4_av8
|
||||
|
||||
.global _ih264_deblk_chroma_horz_bslt4_av8
|
||||
_ih264_deblk_chroma_horz_bslt4_av8 = ih264_deblk_chroma_horz_bslt4_av8
|
||||
|
||||
.global _ih264_deblk_chroma_vert_bs4_av8
|
||||
_ih264_deblk_chroma_vert_bs4_av8 = ih264_deblk_chroma_vert_bs4_av8
|
||||
|
||||
.global _ih264_deblk_chroma_vert_bslt4_av8
|
||||
_ih264_deblk_chroma_vert_bslt4_av8 = ih264_deblk_chroma_vert_bslt4_av8
|
||||
|
||||
.global _ih264_deblk_luma_horz_bs4_av8
|
||||
_ih264_deblk_luma_horz_bs4_av8 = ih264_deblk_luma_horz_bs4_av8
|
||||
|
||||
.global _ih264_deblk_luma_horz_bslt4_av8
|
||||
_ih264_deblk_luma_horz_bslt4_av8 = ih264_deblk_luma_horz_bslt4_av8
|
||||
|
||||
.global _ih264_deblk_luma_vert_bs4_av8
|
||||
_ih264_deblk_luma_vert_bs4_av8 = ih264_deblk_luma_vert_bs4_av8
|
||||
|
||||
.global _ih264_deblk_luma_vert_bslt4_av8
|
||||
_ih264_deblk_luma_vert_bslt4_av8 = ih264_deblk_luma_vert_bslt4_av8
|
||||
|
||||
.global _ih264_default_weighted_pred_chroma_av8
|
||||
_ih264_default_weighted_pred_chroma_av8 = ih264_default_weighted_pred_chroma_av8
|
||||
|
||||
.global _ih264_default_weighted_pred_luma_av8
|
||||
_ih264_default_weighted_pred_luma_av8 = ih264_default_weighted_pred_luma_av8
|
||||
|
||||
.global _ih264_ihadamard_scaling_4x4_av8
|
||||
_ih264_ihadamard_scaling_4x4_av8 = ih264_ihadamard_scaling_4x4_av8
|
||||
|
||||
.global _ih264_inter_pred_chroma_av8
|
||||
_ih264_inter_pred_chroma_av8 = ih264_inter_pred_chroma_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_copy_av8
|
||||
_ih264_inter_pred_luma_copy_av8 = ih264_inter_pred_luma_copy_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_av8
|
||||
_ih264_inter_pred_luma_horz_av8 = ih264_inter_pred_luma_horz_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
|
||||
_ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
|
||||
_ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_qpel_av8
|
||||
_ih264_inter_pred_luma_horz_qpel_av8 = ih264_inter_pred_luma_horz_qpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
|
||||
_ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
|
||||
_ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_vert_av8
|
||||
_ih264_inter_pred_luma_vert_av8 = ih264_inter_pred_luma_vert_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_vert_qpel_av8
|
||||
_ih264_inter_pred_luma_vert_qpel_av8 = ih264_inter_pred_luma_vert_qpel_av8
|
||||
|
||||
.global _ih264_intra_pred_chroma_8x8_mode_horz_av8
|
||||
_ih264_intra_pred_chroma_8x8_mode_horz_av8 = ih264_intra_pred_chroma_8x8_mode_horz_av8
|
||||
|
||||
.global _ih264_intra_pred_chroma_8x8_mode_plane_av8
|
||||
_ih264_intra_pred_chroma_8x8_mode_plane_av8 = ih264_intra_pred_chroma_8x8_mode_plane_av8
|
||||
|
||||
.global _ih264_intra_pred_chroma_8x8_mode_vert_av8
|
||||
_ih264_intra_pred_chroma_8x8_mode_vert_av8 = ih264_intra_pred_chroma_8x8_mode_vert_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_16x16_mode_dc_av8
|
||||
_ih264_intra_pred_luma_16x16_mode_dc_av8 = ih264_intra_pred_luma_16x16_mode_dc_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_16x16_mode_horz_av8
|
||||
_ih264_intra_pred_luma_16x16_mode_horz_av8 = ih264_intra_pred_luma_16x16_mode_horz_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_16x16_mode_plane_av8
|
||||
_ih264_intra_pred_luma_16x16_mode_plane_av8 = ih264_intra_pred_luma_16x16_mode_plane_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_16x16_mode_vert_av8
|
||||
_ih264_intra_pred_luma_16x16_mode_vert_av8 = ih264_intra_pred_luma_16x16_mode_vert_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_dc_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_dc_av8 = ih264_intra_pred_luma_4x4_mode_dc_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_diag_dl_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_diag_dl_av8 = ih264_intra_pred_luma_4x4_mode_diag_dl_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_diag_dr_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_diag_dr_av8 = ih264_intra_pred_luma_4x4_mode_diag_dr_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_horz_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_horz_av8 = ih264_intra_pred_luma_4x4_mode_horz_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_horz_d_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_horz_d_av8 = ih264_intra_pred_luma_4x4_mode_horz_d_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_horz_u_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_horz_u_av8 = ih264_intra_pred_luma_4x4_mode_horz_u_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_vert_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_vert_av8 = ih264_intra_pred_luma_4x4_mode_vert_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_vert_l_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_vert_l_av8 = ih264_intra_pred_luma_4x4_mode_vert_l_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_vert_r_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_vert_r_av8 = ih264_intra_pred_luma_4x4_mode_vert_r_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_dc_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_dc_av8 = ih264_intra_pred_luma_8x8_mode_dc_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_diag_dl_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_diag_dl_av8 = ih264_intra_pred_luma_8x8_mode_diag_dl_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_diag_dr_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_diag_dr_av8 = ih264_intra_pred_luma_8x8_mode_diag_dr_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_horz_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_horz_av8 = ih264_intra_pred_luma_8x8_mode_horz_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_horz_d_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_horz_d_av8 = ih264_intra_pred_luma_8x8_mode_horz_d_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_horz_u_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_horz_u_av8 = ih264_intra_pred_luma_8x8_mode_horz_u_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_vert_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_vert_av8 = ih264_intra_pred_luma_8x8_mode_vert_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_vert_l_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_vert_l_av8 = ih264_intra_pred_luma_8x8_mode_vert_l_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_vert_r_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_vert_r_av8 = ih264_intra_pred_luma_8x8_mode_vert_r_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_4x4_av8
|
||||
_ih264_iquant_itrans_recon_4x4_av8 = ih264_iquant_itrans_recon_4x4_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_4x4_dc_av8
|
||||
_ih264_iquant_itrans_recon_4x4_dc_av8 = ih264_iquant_itrans_recon_4x4_dc_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_8x8_av8
|
||||
_ih264_iquant_itrans_recon_8x8_av8 = ih264_iquant_itrans_recon_8x8_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_8x8_dc_av8
|
||||
_ih264_iquant_itrans_recon_8x8_dc_av8 = ih264_iquant_itrans_recon_8x8_dc_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_chroma_4x4_av8
|
||||
_ih264_iquant_itrans_recon_chroma_4x4_av8 = ih264_iquant_itrans_recon_chroma_4x4_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_chroma_4x4_dc_av8
|
||||
_ih264_iquant_itrans_recon_chroma_4x4_dc_av8 = ih264_iquant_itrans_recon_chroma_4x4_dc_av8
|
||||
|
||||
.global _ih264_pad_left_chroma_av8
|
||||
_ih264_pad_left_chroma_av8 = ih264_pad_left_chroma_av8
|
||||
|
||||
.global _ih264_pad_left_luma_av8
|
||||
_ih264_pad_left_luma_av8 = ih264_pad_left_luma_av8
|
||||
|
||||
.global _ih264_pad_right_chroma_av8
|
||||
_ih264_pad_right_chroma_av8 = ih264_pad_right_chroma_av8
|
||||
|
||||
.global _ih264_pad_right_luma_av8
|
||||
_ih264_pad_right_luma_av8 = ih264_pad_right_luma_av8
|
||||
|
||||
.global _ih264_pad_top_av8
|
||||
_ih264_pad_top_av8 = ih264_pad_top_av8
|
||||
|
||||
.global _ih264_weighted_bi_pred_chroma_av8
|
||||
_ih264_weighted_bi_pred_chroma_av8 = ih264_weighted_bi_pred_chroma_av8
|
||||
|
||||
.global _ih264_weighted_bi_pred_luma_av8
|
||||
_ih264_weighted_bi_pred_luma_av8 = ih264_weighted_bi_pred_luma_av8
|
||||
|
||||
.global _ih264_weighted_pred_chroma_av8
|
||||
_ih264_weighted_pred_chroma_av8 = ih264_weighted_pred_chroma_av8
|
||||
|
||||
.global _ih264_weighted_pred_luma_av8
|
||||
_ih264_weighted_pred_luma_av8 = ih264_weighted_pred_luma_av8
|
2
dependencies/vcpkg
vendored
2
dependencies/vcpkg
vendored
|
@ -1 +1 @@
|
|||
Subproject commit a4275b7eee79fb24ec2e135481ef5fce8b41c339
|
||||
Subproject commit 533a5fda5c0646d1771345fb572e759283444d5f
|
1
dependencies/xbyak_aarch64
vendored
Submodule
1
dependencies/xbyak_aarch64
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 904b8923457f3ec0d6f82ea2d6832a792851194d
|
1
dist/linux/info.cemu.Cemu.desktop
vendored
1
dist/linux/info.cemu.Cemu.desktop
vendored
|
@ -24,3 +24,4 @@ Comment[it]=Software per emulare giochi e applicazioni per Wii U su PC
|
|||
Categories=Game;Emulator;
|
||||
Keywords=Nintendo;
|
||||
MimeType=application/x-wii-u-rom;
|
||||
StartupWMClass=Cemu
|
||||
|
|
|
@ -49,7 +49,6 @@ add_subdirectory(audio)
|
|||
add_subdirectory(util)
|
||||
add_subdirectory(imgui)
|
||||
add_subdirectory(resource)
|
||||
add_subdirectory(asm)
|
||||
|
||||
add_executable(CemuBin
|
||||
main.cpp
|
||||
|
@ -102,13 +101,21 @@ if (MACOS_BUNDLE)
|
|||
endforeach(folder)
|
||||
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/debug/lib/libusb-1.0.0.dylib")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/debug/lib/libusb-1.0.0.dylib")
|
||||
else()
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib")
|
||||
endif()
|
||||
|
||||
if (EXISTS "/usr/local/lib/libMoltenVK.dylib")
|
||||
set(MOLTENVK_PATH "/usr/local/lib/libMoltenVK.dylib")
|
||||
elseif (EXISTS "/opt/homebrew/lib/libMoltenVK.dylib")
|
||||
set(MOLTENVK_PATH "/opt/homebrew/lib/libMoltenVK.dylib")
|
||||
else()
|
||||
message(FATAL_ERROR "failed to find libMoltenVK.dylib")
|
||||
endif ()
|
||||
|
||||
add_custom_command (TARGET CemuBin POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} ARGS -E copy "/usr/local/lib/libMoltenVK.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib"
|
||||
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${MOLTENVK_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib"
|
||||
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${LIBUSB_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib"
|
||||
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_SOURCE_DIR}/src/resource/update.sh" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/update.sh"
|
||||
COMMAND bash -c "install_name_tool -add_rpath @executable_path/../Frameworks ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}"
|
||||
|
|
|
@ -67,24 +67,31 @@ add_library(CemuCafe
|
|||
HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h
|
||||
HW/Espresso/Recompiler/PPCRecompiler.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompiler.h
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp
|
||||
HW/Espresso/Recompiler/IML/IML.h
|
||||
HW/Espresso/Recompiler/IML/IMLSegment.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLSegment.h
|
||||
HW/Espresso/Recompiler/IML/IMLInstruction.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLInstruction.h
|
||||
HW/Espresso/Recompiler/IML/IMLDebug.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
|
||||
HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerIml.h
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerX64.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerX64.h
|
||||
HW/Espresso/Recompiler/x64Emit.hpp
|
||||
HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp
|
||||
HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp
|
||||
HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
|
||||
HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
|
||||
HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp
|
||||
HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp
|
||||
HW/Espresso/Recompiler/BackendX64/BackendX64.h
|
||||
HW/Espresso/Recompiler/BackendX64/X64Emit.hpp
|
||||
HW/Espresso/Recompiler/BackendX64/x86Emitter.h
|
||||
HW/Latte/Common/RegisterSerializer.cpp
|
||||
HW/Latte/Common/RegisterSerializer.h
|
||||
HW/Latte/Common/ShaderSerializer.cpp
|
||||
|
@ -469,6 +476,10 @@ add_library(CemuCafe
|
|||
OS/libs/nsyshid/Infinity.h
|
||||
OS/libs/nsyshid/Skylander.cpp
|
||||
OS/libs/nsyshid/Skylander.h
|
||||
OS/libs/nsyshid/SkylanderXbox360.cpp
|
||||
OS/libs/nsyshid/SkylanderXbox360.h
|
||||
OS/libs/nsyshid/g721/g721.cpp
|
||||
OS/libs/nsyshid/g721/g721.h
|
||||
OS/libs/nsyskbd/nsyskbd.cpp
|
||||
OS/libs/nsyskbd/nsyskbd.h
|
||||
OS/libs/nsysnet/nsysnet.cpp
|
||||
|
@ -526,6 +537,14 @@ if(APPLE)
|
|||
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm")
|
||||
endif()
|
||||
|
||||
if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
|
||||
target_sources(CemuCafe PRIVATE
|
||||
HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
|
||||
HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
|
||||
)
|
||||
target_link_libraries(CemuCafe PRIVATE xbyak_aarch64)
|
||||
endif()
|
||||
|
||||
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
||||
target_include_directories(CemuCafe PUBLIC "../")
|
||||
|
@ -533,11 +552,10 @@ target_include_directories(CemuCafe PUBLIC "../")
|
|||
if (glslang_VERSION VERSION_LESS "15.0.0")
|
||||
set(glslang_target "glslang::SPIRV")
|
||||
else()
|
||||
set(glslang_target "glslang")
|
||||
set(glslang_target "glslang::glslang")
|
||||
endif()
|
||||
|
||||
target_link_libraries(CemuCafe PRIVATE
|
||||
CemuAsm
|
||||
CemuAudio
|
||||
CemuCommon
|
||||
CemuComponents
|
||||
|
|
|
@ -844,7 +844,7 @@ namespace CafeSystem
|
|||
module->TitleStart();
|
||||
cemu_initForGame();
|
||||
// enter scheduler
|
||||
if (ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler && !LaunchSettings::ForceInterpreter())
|
||||
if ((ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler || LaunchSettings::ForceMultiCoreInterpreter()) && !LaunchSettings::ForceInterpreter())
|
||||
coreinit::OSSchedulerBegin(3);
|
||||
else
|
||||
coreinit::OSSchedulerBegin(1);
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
|
||||
#define SET_FST_ERROR(__code) if (errorCodeOut) *errorCodeOut = ErrorCode::__code
|
||||
|
||||
static_assert(sizeof(NCrypto::AesIv) == 16); // make sure IV is actually 16 bytes
|
||||
|
||||
class FSTDataSource
|
||||
{
|
||||
public:
|
||||
|
@ -868,7 +870,7 @@ static_assert(sizeof(FSTHashedBlock) == BLOCK_SIZE);
|
|||
struct FSTCachedRawBlock
|
||||
{
|
||||
FSTRawBlock blockData;
|
||||
uint8 ivForNextBlock[16];
|
||||
NCrypto::AesIv ivForNextBlock;
|
||||
uint64 lastAccess;
|
||||
};
|
||||
|
||||
|
@ -919,13 +921,13 @@ void FSTVolume::TrimCacheIfRequired(FSTCachedRawBlock** droppedRawBlock, FSTCach
|
|||
}
|
||||
}
|
||||
|
||||
void FSTVolume::DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, uint8 ivOut[16])
|
||||
void FSTVolume::DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, NCrypto::AesIv& ivOut)
|
||||
{
|
||||
memset(ivOut, 0, sizeof(ivOut));
|
||||
ivOut = {};
|
||||
if(blockIndex == 0)
|
||||
{
|
||||
ivOut[0] = (uint8)(clusterIndex >> 8);
|
||||
ivOut[1] = (uint8)(clusterIndex >> 0);
|
||||
ivOut.iv[0] = (uint8)(clusterIndex >> 8);
|
||||
ivOut.iv[1] = (uint8)(clusterIndex >> 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -936,20 +938,20 @@ void FSTVolume::DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex,
|
|||
auto itr = m_cacheDecryptedRawBlocks.find(cacheBlockId);
|
||||
if (itr != m_cacheDecryptedRawBlocks.end())
|
||||
{
|
||||
memcpy(ivOut, itr->second->ivForNextBlock, 16);
|
||||
ivOut = itr->second->ivForNextBlock;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert(m_sectorSize >= 16);
|
||||
cemu_assert(m_sectorSize >= NCrypto::AesIv::SIZE);
|
||||
uint64 clusterOffset = (uint64)m_cluster[clusterIndex].offset * m_sectorSize;
|
||||
uint8 prevIV[16];
|
||||
if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * m_sectorSize - 16, prevIV, 16) != 16)
|
||||
NCrypto::AesIv prevIV{};
|
||||
if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * m_sectorSize - NCrypto::AesIv::SIZE, prevIV.iv, NCrypto::AesIv::SIZE) != NCrypto::AesIv::SIZE)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Failed to read IV for raw FST block");
|
||||
m_detectedCorruption = true;
|
||||
return;
|
||||
}
|
||||
memcpy(ivOut, prevIV, 16);
|
||||
ivOut = prevIV;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -984,10 +986,10 @@ FSTCachedRawBlock* FSTVolume::GetDecryptedRawBlock(uint32 clusterIndex, uint32 b
|
|||
return nullptr;
|
||||
}
|
||||
// decrypt hash data
|
||||
uint8 iv[16]{};
|
||||
NCrypto::AesIv iv{};
|
||||
DetermineUnhashedBlockIV(clusterIndex, blockIndex, iv);
|
||||
memcpy(block->ivForNextBlock, block->blockData.rawData.data() + m_sectorSize - 16, 16);
|
||||
AES128_CBC_decrypt(block->blockData.rawData.data(), block->blockData.rawData.data(), m_sectorSize, m_partitionTitlekey.b, iv);
|
||||
std::copy(block->blockData.rawData.data() + m_sectorSize - NCrypto::AesIv::SIZE, block->blockData.rawData.data() + m_sectorSize, block->ivForNextBlock.iv);
|
||||
AES128_CBC_decrypt(block->blockData.rawData.data(), block->blockData.rawData.data(), m_sectorSize, m_partitionTitlekey.b, iv.iv);
|
||||
// if this is the next block, then hash it
|
||||
if(cluster.hasContentHash)
|
||||
{
|
||||
|
|
|
@ -83,7 +83,6 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
|
||||
/* FST data (in memory) */
|
||||
enum class ClusterHashMode : uint8
|
||||
{
|
||||
|
@ -193,7 +192,7 @@ private:
|
|||
std::unordered_map<uint64, struct FSTCachedHashedBlock*> m_cacheDecryptedHashedBlocks;
|
||||
uint64 m_cacheAccessCounter{};
|
||||
|
||||
void DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, uint8 ivOut[16]);
|
||||
void DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, NCrypto::AesIv& ivOut);
|
||||
|
||||
struct FSTCachedRawBlock* GetDecryptedRawBlock(uint32 clusterIndex, uint32 blockIndex);
|
||||
struct FSTCachedHashedBlock* GetDecryptedHashedBlock(uint32 clusterIndex, uint32 blockIndex);
|
||||
|
|
|
@ -821,7 +821,7 @@ void GraphicPack2::AddConstantsForCurrentPreset(ExpressionParser& ep)
|
|||
}
|
||||
}
|
||||
|
||||
void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC)
|
||||
void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC, const char* virtualMountBase)
|
||||
{
|
||||
uint64 currentTitleId = CafeSystem::GetForegroundTitleId();
|
||||
uint64 aocTitleId = (currentTitleId & 0xFFFFFFFFull) | 0x0005000c00000000ull;
|
||||
|
@ -836,7 +836,7 @@ void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC
|
|||
}
|
||||
else
|
||||
{
|
||||
virtualMountPath = fs::path("vol/content/") / virtualMountPath;
|
||||
virtualMountPath = fs::path(virtualMountBase) / virtualMountPath;
|
||||
}
|
||||
fscDeviceRedirect_add(virtualMountPath.generic_string(), it.file_size(), it.path().generic_string(), m_fs_priority);
|
||||
}
|
||||
|
@ -861,7 +861,7 @@ void GraphicPack2::LoadReplacedFiles()
|
|||
{
|
||||
// setup redirections
|
||||
fscDeviceRedirect_map();
|
||||
_iterateReplacedFiles(contentPath, false);
|
||||
_iterateReplacedFiles(contentPath, false, "vol/content/");
|
||||
}
|
||||
// /aoc/
|
||||
fs::path aocPath(gfxPackPath);
|
||||
|
@ -874,7 +874,18 @@ void GraphicPack2::LoadReplacedFiles()
|
|||
aocTitleId |= 0x0005000c00000000ULL;
|
||||
// setup redirections
|
||||
fscDeviceRedirect_map();
|
||||
_iterateReplacedFiles(aocPath, true);
|
||||
_iterateReplacedFiles(aocPath, true, nullptr);
|
||||
}
|
||||
|
||||
// /code/
|
||||
fs::path codePath(gfxPackPath);
|
||||
codePath.append("code");
|
||||
|
||||
if (fs::exists(codePath, ec))
|
||||
{
|
||||
// setup redirections
|
||||
fscDeviceRedirect_map();
|
||||
_iterateReplacedFiles(codePath, false, CafeSystem::GetInternalVirtualCodeFolder().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -260,7 +260,7 @@ private:
|
|||
CustomShader LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type) const;
|
||||
void ApplyShaderPresets(std::string& shader_source) const;
|
||||
void LoadReplacedFiles();
|
||||
void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC);
|
||||
void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC, const char* virtualMountBase);
|
||||
|
||||
// ram mappings
|
||||
std::vector<std::pair<MPTR, MPTR>> m_ramMappings;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "gui/debugger/DebuggerWindow2.h"
|
||||
|
||||
#include "Cafe/OS/libs/coreinit/coreinit.h"
|
||||
#include "util/helpers/helpers.h"
|
||||
|
||||
#if BOOST_OS_WINDOWS
|
||||
#include <Windows.h>
|
||||
|
@ -136,11 +137,6 @@ void debugger_createCodeBreakpoint(uint32 address, uint8 bpType)
|
|||
debugger_updateExecutionBreakpoint(address);
|
||||
}
|
||||
|
||||
void debugger_createExecuteBreakpoint(uint32 address)
|
||||
{
|
||||
debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL);
|
||||
}
|
||||
|
||||
namespace coreinit
|
||||
{
|
||||
std::vector<std::thread::native_handle_type>& OSGetSchedulerThreads();
|
||||
|
@ -294,8 +290,23 @@ void debugger_toggleExecuteBreakpoint(uint32 address)
|
|||
}
|
||||
else
|
||||
{
|
||||
// create new breakpoint
|
||||
debugger_createExecuteBreakpoint(address);
|
||||
// create new execution breakpoint
|
||||
debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL);
|
||||
}
|
||||
}
|
||||
|
||||
void debugger_toggleLoggingBreakpoint(uint32 address)
|
||||
{
|
||||
auto existingBP = debugger_getFirstBP(address, DEBUGGER_BP_T_LOGGING);
|
||||
if (existingBP)
|
||||
{
|
||||
// delete existing breakpoint
|
||||
debugger_deleteBreakpoint(existingBP);
|
||||
}
|
||||
else
|
||||
{
|
||||
// create new logging breakpoint
|
||||
debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_LOGGING);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -538,7 +549,48 @@ void debugger_enterTW(PPCInterpreter_t* hCPU)
|
|||
{
|
||||
if (bp->bpType == DEBUGGER_BP_T_LOGGING && bp->enabled)
|
||||
{
|
||||
std::string logName = !bp->comment.empty() ? "Breakpoint '"+boost::nowide::narrow(bp->comment)+"'" : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address);
|
||||
std::string comment = !bp->comment.empty() ? boost::nowide::narrow(bp->comment) : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address);
|
||||
|
||||
auto replacePlaceholders = [&](const std::string& prefix, const auto& formatFunc)
|
||||
{
|
||||
size_t pos = 0;
|
||||
while ((pos = comment.find(prefix, pos)) != std::string::npos)
|
||||
{
|
||||
size_t endPos = comment.find('}', pos);
|
||||
if (endPos == std::string::npos)
|
||||
break;
|
||||
|
||||
try
|
||||
{
|
||||
if (int regNum = ConvertString<int>(comment.substr(pos + prefix.length(), endPos - pos - prefix.length())); regNum >= 0 && regNum < 32)
|
||||
{
|
||||
std::string replacement = formatFunc(regNum);
|
||||
comment.replace(pos, endPos - pos + 1, replacement);
|
||||
pos += replacement.length();
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = endPos + 1;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
pos = endPos + 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Replace integer register placeholders {rX}
|
||||
replacePlaceholders("{r", [&](int regNum) {
|
||||
return fmt::format("0x{:08X}", hCPU->gpr[regNum]);
|
||||
});
|
||||
|
||||
// Replace floating point register placeholders {fX}
|
||||
replacePlaceholders("{f", [&](int regNum) {
|
||||
return fmt::format("{}", hCPU->fpr[regNum].fpr);
|
||||
});
|
||||
|
||||
std::string logName = "Breakpoint '" + comment + "'";
|
||||
std::string logContext = fmt::format("Thread: {:08x} LR: 0x{:08x}", MEMPTR<OSThread_t>(coreinit::OSGetCurrentThread()).GetMPTR(), hCPU->spr.LR, cemuLog_advancedPPCLoggingEnabled() ? " Stack Trace:" : "");
|
||||
cemuLog_log(LogType::Force, "[Debugger] {} was executed! {}", logName, logContext);
|
||||
if (cemuLog_advancedPPCLoggingEnabled())
|
||||
|
|
|
@ -100,8 +100,8 @@ extern debuggerState_t debuggerState;
|
|||
// new API
|
||||
DebuggerBreakpoint* debugger_getFirstBP(uint32 address);
|
||||
void debugger_createCodeBreakpoint(uint32 address, uint8 bpType);
|
||||
void debugger_createExecuteBreakpoint(uint32 address);
|
||||
void debugger_toggleExecuteBreakpoint(uint32 address); // create/remove execute breakpoint
|
||||
void debugger_toggleLoggingBreakpoint(uint32 address); // create/remove logging breakpoint
|
||||
void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp);
|
||||
|
||||
void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite);
|
||||
|
|
|
@ -10,6 +10,18 @@ namespace Espresso
|
|||
CR_BIT_INDEX_SO = 3,
|
||||
};
|
||||
|
||||
enum class PSQ_LOAD_TYPE
|
||||
{
|
||||
TYPE_F32 = 0,
|
||||
TYPE_UNUSED1 = 1,
|
||||
TYPE_UNUSED2 = 2,
|
||||
TYPE_UNUSED3 = 3,
|
||||
TYPE_U8 = 4,
|
||||
TYPE_U16 = 5,
|
||||
TYPE_S8 = 6,
|
||||
TYPE_S16 = 7,
|
||||
};
|
||||
|
||||
enum class PrimaryOpcode
|
||||
{
|
||||
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)
|
||||
|
@ -91,13 +103,15 @@ namespace Espresso
|
|||
BCCTR = 528
|
||||
};
|
||||
|
||||
enum class OPCODE_31
|
||||
enum class Opcode31
|
||||
{
|
||||
|
||||
TW = 4,
|
||||
MFTB = 371,
|
||||
};
|
||||
|
||||
inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); };
|
||||
inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); };
|
||||
inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); };
|
||||
|
||||
struct BOField
|
||||
{
|
||||
|
@ -132,6 +146,12 @@ namespace Espresso
|
|||
uint8 bo;
|
||||
};
|
||||
|
||||
// returns true if LK bit is set, only valid for branch instructions
|
||||
inline bool DecodeLK(uint32 opcode)
|
||||
{
|
||||
return (opcode & 1) != 0;
|
||||
}
|
||||
|
||||
inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK)
|
||||
{
|
||||
LI = opcode & 0x3fffffc;
|
||||
|
@ -183,13 +203,7 @@ namespace Espresso
|
|||
_decodeForm_D_branch(opcode, BD, BO, BI, AA, LK);
|
||||
}
|
||||
|
||||
inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
|
||||
{
|
||||
// form XL (with BD field expected to be zero)
|
||||
_decodeForm_XL(opcode, BO, BI, LK);
|
||||
}
|
||||
|
||||
inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
|
||||
inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR
|
||||
{
|
||||
// form XL (with BD field expected to be zero)
|
||||
_decodeForm_XL(opcode, BO, BI, LK);
|
||||
|
|
|
@ -3,12 +3,12 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow)
|
|||
{
|
||||
if (hasOverflow)
|
||||
{
|
||||
hCPU->spr.XER |= XER_SO;
|
||||
hCPU->spr.XER |= XER_OV;
|
||||
hCPU->xer_so = 1;
|
||||
hCPU->xer_ov = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->spr.XER &= ~XER_OV;
|
||||
hCPU->xer_ov = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
|
||||
static void PPCInterpreter_ADDO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
// untested (Don't Starve Giant Edition uses this instruction + BSO)
|
||||
// Don't Starve Giant Edition uses this instruction + BSO
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
uint32 result = hCPU->gpr[rA] + hCPU->gpr[rB];
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(hCPU->gpr[rA], hCPU->gpr[rB], result));
|
||||
|
@ -113,7 +113,6 @@ static void PPCInterpreter_ADDEO(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
else
|
||||
hCPU->xer_ca = 0;
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, b, hCPU->gpr[rD]));
|
||||
// update CR
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -130,7 +129,7 @@ static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
|
||||
static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
sint32 rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
|
@ -145,7 +144,7 @@ static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
|
||||
static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
sint32 rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
|
@ -155,14 +154,13 @@ static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
// update cr0 flags
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDIS(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
sint32 rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm);
|
||||
hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0) + imm;
|
||||
|
@ -185,6 +183,23 @@ static void PPCInterpreter_ADDZE(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDZEO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = a + ca;
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, 0, hCPU->gpr[rD]));
|
||||
if ((a == 0xffffffff) && ca)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
|
@ -201,6 +216,23 @@ static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_ADDMEO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = a + ca + 0xffffffff;
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, 0xffffffff, hCPU->gpr[rD]));
|
||||
if (a || ca)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
|
@ -246,7 +278,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
hCPU->gpr[rD] = ~a + b + 1;
|
||||
// update xer
|
||||
// update carry
|
||||
if (ppc_carry_3(~a, b, 1))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
|
@ -260,7 +292,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
|
||||
static void PPCInterpreter_SUBFIC(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
int rD, rA;
|
||||
sint32 rD, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
|
@ -284,7 +316,6 @@ static void PPCInterpreter_SUBFE(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
// update cr0
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -304,7 +335,6 @@ static void PPCInterpreter_SUBFEO(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
else
|
||||
hCPU->xer_ca = 0;
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, b, result));
|
||||
// update cr0
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -326,9 +356,25 @@ static void PPCInterpreter_SUBFZE(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFZEO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = ~a + ca;
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, 0, hCPU->gpr[rD]));
|
||||
if (a == 0 && ca)
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
// untested
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
|
@ -339,7 +385,24 @@ static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
// update cr0
|
||||
if (opcode & PPC_OPC_RC)
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_SUBFMEO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
PPC_ASSERT(rB == 0);
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 ca = hCPU->xer_ca;
|
||||
hCPU->gpr[rD] = ~a + 0xFFFFFFFF + ca;
|
||||
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, 0xFFFFFFFF, hCPU->gpr[rD]));
|
||||
// update xer carry
|
||||
if (ppc_carry_3(~a, 0xFFFFFFFF, ca))
|
||||
hCPU->xer_ca = 1;
|
||||
else
|
||||
hCPU->xer_ca = 0;
|
||||
if (opcode & PPC_OPC_RC)
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -352,13 +415,8 @@ static void PPCInterpreter_MULHW_(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
sint64 b = (sint32)hCPU->gpr[rB];
|
||||
sint64 c = a * b;
|
||||
hCPU->gpr[rD] = ((uint64)c) >> 32;
|
||||
if (opcode & PPC_OPC_RC) {
|
||||
// update cr0 flags
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
assert_dbg();
|
||||
#endif
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
}
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
|
@ -409,14 +467,14 @@ static void PPCInterpreter_MULLI(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
sint32 a = hCPU->gpr[rA];
|
||||
sint32 b = hCPU->gpr[rB];
|
||||
sint32 a = (sint32)hCPU->gpr[rA];
|
||||
sint32 b = (sint32)hCPU->gpr[rB];
|
||||
if (b == 0)
|
||||
{
|
||||
cemuLog_logDebug(LogType::Force, "Error: Division by zero! [{:08x}]", (uint32)hCPU->instructionPointer);
|
||||
b++;
|
||||
}
|
||||
hCPU->gpr[rD] = a / b;
|
||||
hCPU->gpr[rD] = a < 0 ? 0xFFFFFFFF : 0;
|
||||
else if (a == 0x80000000 && b == 0xFFFFFFFF)
|
||||
hCPU->gpr[rD] = 0xFFFFFFFF;
|
||||
else
|
||||
hCPU->gpr[rD] = a / b;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -425,16 +483,23 @@ static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
sint32 a = hCPU->gpr[rA];
|
||||
sint32 b = hCPU->gpr[rB];
|
||||
sint32 a = (sint32)hCPU->gpr[rA];
|
||||
sint32 b = (sint32)hCPU->gpr[rB];
|
||||
if (b == 0)
|
||||
{
|
||||
PPCInterpreter_setXerOV(hCPU, true);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
return;
|
||||
hCPU->gpr[rD] = a < 0 ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
else if(a == 0x80000000 && b == 0xFFFFFFFF)
|
||||
{
|
||||
PPCInterpreter_setXerOV(hCPU, true);
|
||||
hCPU->gpr[rD] = 0xFFFFFFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->gpr[rD] = a / b;
|
||||
PPCInterpreter_setXerOV(hCPU, false);
|
||||
}
|
||||
hCPU->gpr[rD] = a / b;
|
||||
PPCInterpreter_setXerOV(hCPU, false);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -443,12 +508,14 @@ static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
if (hCPU->gpr[rB] == 0)
|
||||
{
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
return;
|
||||
}
|
||||
hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
if (b == 0)
|
||||
hCPU->gpr[rD] = 0;
|
||||
else if (a == 0x80000000 && b == 0xFFFFFFFF)
|
||||
hCPU->gpr[rD] = 0;
|
||||
else
|
||||
hCPU->gpr[rD] = a / b;
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -457,14 +524,23 @@ static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
static void PPCInterpreter_DIVWUO(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL3_XO();
|
||||
if (hCPU->gpr[rB] == 0)
|
||||
uint32 a = hCPU->gpr[rA];
|
||||
uint32 b = hCPU->gpr[rB];
|
||||
if (b == 0)
|
||||
{
|
||||
PPCInterpreter_setXerOV(hCPU, true);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
return;
|
||||
hCPU->gpr[rD] = 0;
|
||||
}
|
||||
else if(a == 0x80000000 && b == 0xFFFFFFFF)
|
||||
{
|
||||
PPCInterpreter_setXerOV(hCPU, false);
|
||||
hCPU->gpr[rD] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
hCPU->gpr[rD] = a / b;
|
||||
PPCInterpreter_setXerOV(hCPU, false);
|
||||
}
|
||||
hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB];
|
||||
PPCInterpreter_setXerOV(hCPU, false);
|
||||
if (opHasRC())
|
||||
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -491,6 +567,13 @@ static void PPCInterpreter_CRANDC(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CRNAND(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
ppc_setCRBit(hCPU, crD, (ppc_getCRBit(hCPU, crA)&ppc_getCRBit(hCPU, crB)) ^ 1);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_CROR(PPCInterpreter_t* hCPU, uint32 opcode)
|
||||
{
|
||||
PPC_OPC_TEMPL_X_CR();
|
||||
|
@ -848,8 +931,7 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
|
||||
else
|
||||
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
|
||||
if ((hCPU->spr.XER & XER_SO) != 0)
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
|
@ -871,8 +953,7 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
|
||||
else
|
||||
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
|
||||
if ((hCPU->spr.XER & XER_SO) != 0)
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
|
@ -895,8 +976,7 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
|
||||
else
|
||||
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
|
||||
if (hCPU->spr.XER & XER_SO)
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
|
@ -919,8 +999,7 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
|
||||
else
|
||||
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
|
||||
if (hCPU->spr.XER & XER_SO)
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
|
||||
hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ espresso_frsqrte_entry_t frsqrteLookupTable[32] =
|
|||
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
|
||||
};
|
||||
|
||||
double frsqrte_espresso(double input)
|
||||
ATTR_MS_ABI double frsqrte_espresso(double input)
|
||||
{
|
||||
unsigned long long x = *(unsigned long long*)&input;
|
||||
|
||||
|
@ -111,7 +111,7 @@ espresso_fres_entry_t fresLookupTable[32] =
|
|||
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
|
||||
};
|
||||
|
||||
double fres_espresso(double input)
|
||||
ATTR_MS_ABI double fres_espresso(double input)
|
||||
{
|
||||
// based on testing we know that fres uses only the first 15 bits of the mantissa
|
||||
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
|
||||
|
|
|
@ -2,62 +2,70 @@
|
|||
#include "PPCInterpreterInternal.h"
|
||||
#include "PPCInterpreterHelper.h"
|
||||
|
||||
std::unordered_set<std::string> sUnsupportedHLECalls;
|
||||
std::unordered_set<std::string> s_unsupportedHLECalls;
|
||||
|
||||
void PPCInterpreter_handleUnsupportedHLECall(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
const char* libFuncName = (char*)memory_getPointerFromVirtualOffset(hCPU->instructionPointer + 8);
|
||||
std::string tempString = fmt::format("Unsupported lib call: {}", libFuncName);
|
||||
if (sUnsupportedHLECalls.find(tempString) == sUnsupportedHLECalls.end())
|
||||
if (s_unsupportedHLECalls.find(tempString) == s_unsupportedHLECalls.end())
|
||||
{
|
||||
cemuLog_log(LogType::UnsupportedAPI, "{}", tempString);
|
||||
sUnsupportedHLECalls.emplace(tempString);
|
||||
s_unsupportedHLECalls.emplace(tempString);
|
||||
}
|
||||
hCPU->gpr[3] = 0;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
std::vector<void(*)(PPCInterpreter_t* hCPU)>* sPPCHLETable{};
|
||||
static constexpr size_t HLE_TABLE_CAPACITY = 0x4000;
|
||||
HLECALL s_ppcHleTable[HLE_TABLE_CAPACITY]{};
|
||||
sint32 s_ppcHleTableWriteIndex = 0;
|
||||
std::mutex s_ppcHleTableMutex;
|
||||
|
||||
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall, std::string hleName)
|
||||
{
|
||||
if (!sPPCHLETable)
|
||||
sPPCHLETable = new std::vector<void(*)(PPCInterpreter_t* hCPU)>();
|
||||
for (sint32 i = 0; i < sPPCHLETable->size(); i++)
|
||||
std::unique_lock _l(s_ppcHleTableMutex);
|
||||
if (s_ppcHleTableWriteIndex >= HLE_TABLE_CAPACITY)
|
||||
{
|
||||
if ((*sPPCHLETable)[i] == hleCall)
|
||||
return i;
|
||||
cemuLog_log(LogType::Force, "HLE table is full");
|
||||
cemu_assert(false);
|
||||
}
|
||||
HLEIDX newFuncIndex = (sint32)sPPCHLETable->size();
|
||||
sPPCHLETable->resize(sPPCHLETable->size() + 1);
|
||||
(*sPPCHLETable)[newFuncIndex] = hleCall;
|
||||
return newFuncIndex;
|
||||
for (sint32 i = 0; i < s_ppcHleTableWriteIndex; i++)
|
||||
{
|
||||
if (s_ppcHleTable[i] == hleCall)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
cemu_assert(s_ppcHleTableWriteIndex < HLE_TABLE_CAPACITY);
|
||||
s_ppcHleTable[s_ppcHleTableWriteIndex] = hleCall;
|
||||
HLEIDX funcIndex = s_ppcHleTableWriteIndex;
|
||||
s_ppcHleTableWriteIndex++;
|
||||
return funcIndex;
|
||||
}
|
||||
|
||||
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex)
|
||||
{
|
||||
if (funcIndex < 0 || funcIndex >= sPPCHLETable->size())
|
||||
if (funcIndex < 0 || funcIndex >= HLE_TABLE_CAPACITY)
|
||||
return nullptr;
|
||||
return sPPCHLETable->data()[funcIndex];
|
||||
return s_ppcHleTable[funcIndex];
|
||||
}
|
||||
|
||||
std::mutex g_hleLogMutex;
|
||||
std::mutex s_hleLogMutex;
|
||||
|
||||
void PPCInterpreter_virtualHLE(PPCInterpreter_t* hCPU, unsigned int opcode)
|
||||
{
|
||||
uint32 hleFuncId = opcode & 0xFFFF;
|
||||
if (hleFuncId == 0xFFD0)
|
||||
if (hleFuncId == 0xFFD0) [[unlikely]]
|
||||
{
|
||||
g_hleLogMutex.lock();
|
||||
s_hleLogMutex.lock();
|
||||
PPCInterpreter_handleUnsupportedHLECall(hCPU);
|
||||
g_hleLogMutex.unlock();
|
||||
return;
|
||||
s_hleLogMutex.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
// os lib function
|
||||
cemu_assert(hleFuncId < sPPCHLETable->size());
|
||||
auto hleCall = (*sPPCHLETable)[hleFuncId];
|
||||
auto hleCall = PPCInterpreter_getHLECall(hleFuncId);
|
||||
cemu_assert(hleCall);
|
||||
hleCall(hCPU);
|
||||
}
|
||||
|
|
|
@ -428,9 +428,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
uint32 testIP[100];
|
||||
uint32 testIPC = 0;
|
||||
|
||||
template <typename ppcItpCtrl>
|
||||
class PPCInterpreterContainer
|
||||
{
|
||||
|
@ -466,6 +463,10 @@ public:
|
|||
case 1: // virtual HLE
|
||||
PPCInterpreter_virtualHLE(hCPU, opcode);
|
||||
break;
|
||||
case 3:
|
||||
cemuLog_logDebug(LogType::Force, "Unsupported TWI instruction executed at {:08x}", hCPU->instructionPointer);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
break;
|
||||
case 4:
|
||||
switch (PPC_getBits(opcode, 30, 5))
|
||||
{
|
||||
|
@ -482,8 +483,9 @@ public:
|
|||
PPCInterpreter_PS_CMPU1(hCPU, opcode);
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X as [4->0] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->0] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
hCPU->instructionPointer += 4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -509,8 +511,9 @@ public:
|
|||
PPCInterpreter_PS_ABS(hCPU, opcode);
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X as [4->8] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->8] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
hCPU->instructionPointer += 4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -548,8 +551,9 @@ public:
|
|||
PPCInterpreter_PS_MERGE11(hCPU, opcode);
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X as [4->16] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
|
||||
debugBreakpoint();
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->16] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
hCPU->instructionPointer += 4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -590,8 +594,9 @@ public:
|
|||
PPCInterpreter_PS_NMADD(hCPU, opcode);
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X as [4] at %08X\n", PPC_getBits(opcode, 30, 5), hCPU->instructionPointer);
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4] at {:08x}", PPC_getBits(opcode, 30, 5), hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
hCPU->instructionPointer += 4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -623,12 +628,15 @@ public:
|
|||
PPCInterpreter_BCX(hCPU, opcode);
|
||||
break;
|
||||
case 17:
|
||||
if (PPC_getBits(opcode, 30, 1) == 1) {
|
||||
if (PPC_getBits(opcode, 30, 1) == 1)
|
||||
{
|
||||
PPCInterpreter_SC(hCPU, opcode);
|
||||
}
|
||||
else {
|
||||
debug_printf("Unsupported Opcode [0x17 --> 0x0]\n");
|
||||
else
|
||||
{
|
||||
cemuLog_logDebug(LogType::Force, "Unsupported Opcode [0x17 --> 0x0]");
|
||||
cemu_assert_unimplemented();
|
||||
hCPU->instructionPointer += 4;
|
||||
}
|
||||
break;
|
||||
case 18:
|
||||
|
@ -658,6 +666,9 @@ public:
|
|||
case 193:
|
||||
PPCInterpreter_CRXOR(hCPU, opcode);
|
||||
break;
|
||||
case 225:
|
||||
PPCInterpreter_CRNAND(hCPU, opcode);
|
||||
break;
|
||||
case 257:
|
||||
PPCInterpreter_CRAND(hCPU, opcode);
|
||||
break;
|
||||
|
@ -674,8 +685,9 @@ public:
|
|||
PPCInterpreter_BCCTR(hCPU, opcode);
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X as [19] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [19] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
hCPU->instructionPointer += 4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -713,9 +725,6 @@ public:
|
|||
PPCInterpreter_CMP(hCPU, opcode);
|
||||
break;
|
||||
case 4:
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
debug_printf("TW instruction executed at %08x\n", hCPU->instructionPointer);
|
||||
#endif
|
||||
PPCInterpreter_TW(hCPU, opcode);
|
||||
break;
|
||||
case 8:
|
||||
|
@ -895,6 +904,12 @@ public:
|
|||
case 522:
|
||||
PPCInterpreter_ADDCO(hCPU, opcode);
|
||||
break;
|
||||
case 523: // 11 | OE
|
||||
PPCInterpreter_MULHWU_(hCPU, opcode); // OE is ignored
|
||||
break;
|
||||
case 533:
|
||||
PPCInterpreter_LSWX(hCPU, opcode);
|
||||
break;
|
||||
case 534:
|
||||
PPCInterpreter_LWBRX(hCPU, opcode);
|
||||
break;
|
||||
|
@ -913,6 +928,9 @@ public:
|
|||
case 567:
|
||||
PPCInterpreter_LFSUX(hCPU, opcode);
|
||||
break;
|
||||
case 587: // 75 | OE
|
||||
PPCInterpreter_MULHW_(hCPU, opcode); // OE is ignored for MULHW
|
||||
break;
|
||||
case 595:
|
||||
PPCInterpreter_MFSR(hCPU, opcode);
|
||||
break;
|
||||
|
@ -943,15 +961,30 @@ public:
|
|||
case 663:
|
||||
PPCInterpreter_STFSX(hCPU, opcode);
|
||||
break;
|
||||
case 661:
|
||||
PPCInterpreter_STSWX(hCPU, opcode);
|
||||
break;
|
||||
case 695:
|
||||
PPCInterpreter_STFSUX(hCPU, opcode);
|
||||
break;
|
||||
case 712: // 200 | OE
|
||||
PPCInterpreter_SUBFZEO(hCPU, opcode);
|
||||
break;
|
||||
case 714: // 202 | OE
|
||||
PPCInterpreter_ADDZEO(hCPU, opcode);
|
||||
break;
|
||||
case 725:
|
||||
PPCInterpreter_STSWI(hCPU, opcode);
|
||||
break;
|
||||
case 727:
|
||||
PPCInterpreter_STFDX(hCPU, opcode);
|
||||
break;
|
||||
case 744: // 232 | OE
|
||||
PPCInterpreter_SUBFMEO(hCPU, opcode);
|
||||
break;
|
||||
case 746: // 234 | OE
|
||||
PPCInterpreter_ADDMEO(hCPU, opcode);
|
||||
break;
|
||||
case 747:
|
||||
PPCInterpreter_MULLWO(hCPU, opcode);
|
||||
break;
|
||||
|
@ -998,10 +1031,8 @@ public:
|
|||
PPCInterpreter_DCBZ(hCPU, opcode);
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X as [31] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
assert_dbg();
|
||||
#endif
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [31] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
hCPU->instructionPointer += 4;
|
||||
break;
|
||||
}
|
||||
|
@ -1084,7 +1115,7 @@ public:
|
|||
case 57:
|
||||
PPCInterpreter_PSQ_LU(hCPU, opcode);
|
||||
break;
|
||||
case 59: //Opcode category
|
||||
case 59: // opcode category
|
||||
switch (PPC_getBits(opcode, 30, 5))
|
||||
{
|
||||
case 18:
|
||||
|
@ -1115,8 +1146,9 @@ public:
|
|||
PPCInterpreter_FNMADDS(hCPU, opcode);
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X as [59] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [59] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
hCPU->instructionPointer += 4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -1195,18 +1227,19 @@ public:
|
|||
case 583:
|
||||
PPCInterpreter_MFFS(hCPU, opcode);
|
||||
break;
|
||||
case 711: // IBM documentation has this wrong as 771?
|
||||
case 711:
|
||||
PPCInterpreter_MTFSF(hCPU, opcode);
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X as [63] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [63] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
debug_printf("Unknown execute %04X at %08X\n", PPC_getBits(opcode, 5, 6), (unsigned int)hCPU->instructionPointer);
|
||||
cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} at {:08x}\n", PPC_getBits(opcode, 5, 6), (unsigned int)hCPU->instructionPointer);
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,9 +50,9 @@
|
|||
#define CR_BIT_EQ 2
|
||||
#define CR_BIT_SO 3
|
||||
|
||||
#define XER_SO (1<<31) // summary overflow bit
|
||||
#define XER_OV (1<<30) // overflow bit
|
||||
#define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8
|
||||
#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO
|
||||
#define XER_BIT_OV (30)
|
||||
|
||||
// FPSCR
|
||||
#define FPSCR_VXSNAN (1<<24)
|
||||
|
@ -118,7 +118,8 @@
|
|||
|
||||
static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r)
|
||||
{
|
||||
hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0;
|
||||
cemu_assert_debug(hCPU->xer_so <= 1);
|
||||
hCPU->cr[CR_BIT_SO] = hCPU->xer_so;
|
||||
hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0);
|
||||
hCPU->cr[CR_BIT_EQ] = (r == 0);
|
||||
hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0
|
||||
|
@ -190,8 +191,8 @@ inline double roundTo25BitAccuracy(double d)
|
|||
return *(double*)&v;
|
||||
}
|
||||
|
||||
double fres_espresso(double input);
|
||||
double frsqrte_espresso(double input);
|
||||
ATTR_MS_ABI double fres_espresso(double input);
|
||||
ATTR_MS_ABI double frsqrte_espresso(double input);
|
||||
|
||||
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ static void PPCInterpreter_STW(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STWU(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS;
|
||||
sint32 rA, rS;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
|
||||
ppcItpCtrl::ppcMem_writeDataU32(hCPU, hCPU->gpr[rA] + imm, hCPU->gpr[rS]);
|
||||
|
@ -42,7 +42,7 @@ static void PPCInterpreter_STWU(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STWX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, rB;
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], hCPU->gpr[rS]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -85,7 +85,8 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
ppc_setCRBit(hCPU, CR_BIT_GT, 0);
|
||||
ppc_setCRBit(hCPU, CR_BIT_EQ, 1);
|
||||
}
|
||||
ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0);
|
||||
cemu_assert_debug(hCPU->xer_so <= 1);
|
||||
ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so);
|
||||
// remove reservation
|
||||
hCPU->reservedMemAddr = 0;
|
||||
hCPU->reservedMemValue = 0;
|
||||
|
@ -102,7 +103,7 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STWUX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, rB;
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], hCPU->gpr[rS]);
|
||||
if (rA)
|
||||
|
@ -112,7 +113,7 @@ static void PPCInterpreter_STWUX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STWBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, rB;
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], _swapEndianU32(hCPU->gpr[rS]));
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -120,7 +121,7 @@ static void PPCInterpreter_STWBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STMW(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rS, rA;
|
||||
sint32 rS, rA;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
|
||||
uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm;
|
||||
|
@ -135,7 +136,7 @@ static void PPCInterpreter_STMW(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STH(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS;
|
||||
sint32 rA, rS;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
|
||||
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint16)hCPU->gpr[rS]);
|
||||
|
@ -144,7 +145,7 @@ static void PPCInterpreter_STH(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STHU(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS;
|
||||
sint32 rA, rS;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
|
||||
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint16)hCPU->gpr[rS]);
|
||||
|
@ -155,7 +156,7 @@ static void PPCInterpreter_STHU(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STHX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, rB;
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint16)hCPU->gpr[rS]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -163,7 +164,7 @@ static void PPCInterpreter_STHX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STHUX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, rB;
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint16)hCPU->gpr[rS]);
|
||||
if (rA)
|
||||
|
@ -173,7 +174,7 @@ static void PPCInterpreter_STHUX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STHBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, rB;
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], _swapEndianU16((uint16)hCPU->gpr[rS]));
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -181,7 +182,7 @@ static void PPCInterpreter_STHBRX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STB(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS;
|
||||
sint32 rA, rS;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
|
||||
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint8)hCPU->gpr[rS]);
|
||||
|
@ -190,7 +191,7 @@ static void PPCInterpreter_STB(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STBU(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS;
|
||||
sint32 rA, rS;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm);
|
||||
ppcItpCtrl::ppcMem_writeDataU8(hCPU, hCPU->gpr[rA] + imm, (uint8)hCPU->gpr[rS]);
|
||||
|
@ -200,7 +201,7 @@ static void PPCInterpreter_STBU(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STBX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, rB;
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint8)hCPU->gpr[rS]);
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
@ -208,7 +209,7 @@ static void PPCInterpreter_STBX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STBUX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, rB;
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint8)hCPU->gpr[rS]);
|
||||
if (rA)
|
||||
|
@ -218,7 +219,7 @@ static void PPCInterpreter_STBUX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
|
||||
static void PPCInterpreter_STSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
int rA, rS, nb;
|
||||
sint32 rA, rS, nb;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, nb);
|
||||
if (nb == 0) nb = 32;
|
||||
uint32 ea = rA ? hCPU->gpr[rA] : 0;
|
||||
|
@ -228,7 +229,39 @@ static void PPCInterpreter_STSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
{
|
||||
if (i == 0)
|
||||
{
|
||||
r = hCPU->gpr[rS];
|
||||
r = rS < 32 ? hCPU->gpr[rS] : 0; // what happens if rS is out of bounds?
|
||||
rS++;
|
||||
rS %= 32;
|
||||
i = 4;
|
||||
}
|
||||
ppcItpCtrl::ppcMem_writeDataU8(hCPU, ea, (r >> 24));
|
||||
r <<= 8;
|
||||
ea++;
|
||||
i--;
|
||||
nb--;
|
||||
}
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_STSWX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
sint32 rA, rS, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rS, rA, rB);
|
||||
sint32 nb = hCPU->spr.XER&0x7F;
|
||||
if (nb == 0)
|
||||
{
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
return;
|
||||
}
|
||||
uint32 ea = rA ? hCPU->gpr[rA] : 0;
|
||||
ea += hCPU->gpr[rB];
|
||||
uint32 r = 0;
|
||||
int i = 0;
|
||||
while (nb > 0)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
r = rS < 32 ? hCPU->gpr[rS] : 0; // what happens if rS is out of bounds?
|
||||
rS++;
|
||||
rS %= 32;
|
||||
i = 4;
|
||||
|
@ -459,7 +492,6 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
PPC_OPC_TEMPL_X(Opcode, rD, rA, nb);
|
||||
if (nb == 0)
|
||||
nb = 32;
|
||||
|
||||
uint32 ea = rA ? hCPU->gpr[rA] : 0;
|
||||
uint32 r = 0;
|
||||
int i = 4;
|
||||
|
@ -469,7 +501,8 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
if (i == 0)
|
||||
{
|
||||
i = 4;
|
||||
hCPU->gpr[rD] = r;
|
||||
if(rD < 32)
|
||||
hCPU->gpr[rD] = r;
|
||||
rD++;
|
||||
rD %= 32;
|
||||
r = 0;
|
||||
|
@ -486,7 +519,52 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
r <<= 8;
|
||||
i--;
|
||||
}
|
||||
hCPU->gpr[rD] = r;
|
||||
if(rD < 32)
|
||||
hCPU->gpr[rD] = r;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
static void PPCInterpreter_LSWX(PPCInterpreter_t* hCPU, uint32 Opcode)
|
||||
{
|
||||
sint32 rA, rD, rB;
|
||||
PPC_OPC_TEMPL_X(Opcode, rD, rA, rB);
|
||||
// byte count comes from XER
|
||||
uint32 nb = (hCPU->spr.XER>>0)&0x7F;
|
||||
if (nb == 0)
|
||||
{
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
return; // no-op
|
||||
}
|
||||
uint32 ea = rA ? hCPU->gpr[rA] : 0;
|
||||
ea += hCPU->gpr[rB];
|
||||
uint32 r = 0;
|
||||
int i = 4;
|
||||
uint8 v;
|
||||
while (nb>0)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
i = 4;
|
||||
if(rD < 32)
|
||||
hCPU->gpr[rD] = r;
|
||||
rD++;
|
||||
rD %= 32;
|
||||
r = 0;
|
||||
}
|
||||
v = ppcItpCtrl::ppcMem_readDataU8(hCPU, ea);
|
||||
r <<= 8;
|
||||
r |= v;
|
||||
ea++;
|
||||
i--;
|
||||
nb--;
|
||||
}
|
||||
while (i)
|
||||
{
|
||||
r <<= 8;
|
||||
i--;
|
||||
}
|
||||
if(rD < 32)
|
||||
hCPU->gpr[rD] = r;
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
}
|
||||
|
||||
|
|
|
@ -63,16 +63,25 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
|
|||
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
uint32 xerValue = hCPU->spr.XER;
|
||||
xerValue &= ~(1<<XER_BIT_CA);
|
||||
if( hCPU->xer_ca )
|
||||
xerValue |= (1<<XER_BIT_CA);
|
||||
xerValue &= ~(1 << XER_BIT_CA);
|
||||
xerValue &= ~(1 << XER_BIT_SO);
|
||||
xerValue &= ~(1 << XER_BIT_OV);
|
||||
if (hCPU->xer_ca)
|
||||
xerValue |= (1 << XER_BIT_CA);
|
||||
if (hCPU->xer_so)
|
||||
xerValue |= (1 << XER_BIT_SO);
|
||||
if (hCPU->xer_ov)
|
||||
xerValue |= (1 << XER_BIT_OV);
|
||||
return xerValue;
|
||||
}
|
||||
|
||||
void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v)
|
||||
{
|
||||
hCPU->spr.XER = v;
|
||||
hCPU->xer_ca = (v>>XER_BIT_CA)&1;
|
||||
const uint32 XER_MASK = 0xE0FFFFFF; // some bits are masked out. Figure out which ones exactly
|
||||
hCPU->spr.XER = v & XER_MASK;
|
||||
hCPU->xer_ca = (v >> XER_BIT_CA) & 1;
|
||||
hCPU->xer_so = (v >> XER_BIT_SO) & 1;
|
||||
hCPU->xer_ov = (v >> XER_BIT_OV) & 1;
|
||||
}
|
||||
|
||||
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU)
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
|
||||
|
||||
#include "../Recompiler/PPCRecompiler.h"
|
||||
#include "../Recompiler/PPCRecompilerX64.h"
|
||||
|
||||
#include <float.h>
|
||||
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
||||
|
@ -94,7 +93,6 @@ void PPCInterpreter_MTCRF(PPCInterpreter_t* hCPU, uint32 Opcode)
|
|||
{
|
||||
// frequently used by GCC compiled code (e.g. SM64 port)
|
||||
// tested
|
||||
|
||||
uint32 rS;
|
||||
uint32 crfMask;
|
||||
PPC_OPC_TEMPL_XFX(Opcode, rS, crfMask);
|
||||
|
|
|
@ -68,6 +68,8 @@ static void PPCInterpreter_TW(PPCInterpreter_t* hCPU, uint32 opcode)
|
|||
PPC_OPC_TEMPL_X(opcode, to, rA, rB);
|
||||
|
||||
cemu_assert_debug(to == 0);
|
||||
if(to != 0)
|
||||
PPCInterpreter_nextInstruction(hCPU);
|
||||
|
||||
if (rA == DEBUGGER_BP_T_DEBUGGER)
|
||||
debugger_enterTW(hCPU);
|
||||
|
|
|
@ -49,12 +49,12 @@ struct PPCInterpreter_t
|
|||
uint32 fpscr;
|
||||
uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..)
|
||||
uint8 xer_ca; // carry from xer
|
||||
uint8 LSQE;
|
||||
uint8 PSE;
|
||||
uint8 xer_so;
|
||||
uint8 xer_ov;
|
||||
// thread remaining cycles
|
||||
sint32 remainingCycles; // if this value goes below zero, the next thread is scheduled
|
||||
sint32 skippedCycles; // number of skipped cycles
|
||||
struct
|
||||
struct
|
||||
{
|
||||
uint32 LR;
|
||||
uint32 CTR;
|
||||
|
@ -67,9 +67,10 @@ struct PPCInterpreter_t
|
|||
uint32 reservedMemValue;
|
||||
// temporary storage for recompiler
|
||||
FPR_t temporaryFPR[8];
|
||||
uint32 temporaryGPR[4];
|
||||
uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away
|
||||
uint32 temporaryGPR_reg[4];
|
||||
// values below this are not used by Cafe OS usermode
|
||||
struct
|
||||
struct
|
||||
{
|
||||
uint32 fpecr; // is this the same register as fpscr ?
|
||||
uint32 DEC;
|
||||
|
@ -84,7 +85,7 @@ struct PPCInterpreter_t
|
|||
// DMA
|
||||
uint32 dmaU;
|
||||
uint32 dmaL;
|
||||
// MMU
|
||||
// MMU
|
||||
uint32 dbatU[8];
|
||||
uint32 dbatL[8];
|
||||
uint32 ibatU[8];
|
||||
|
@ -92,6 +93,8 @@ struct PPCInterpreter_t
|
|||
uint32 sr[16];
|
||||
uint32 sdr1;
|
||||
}sprExtended;
|
||||
uint8 LSQE;
|
||||
uint8 PSE;
|
||||
// global CPU values
|
||||
PPCInterpreterGlobal_t* global;
|
||||
// interpreter control
|
||||
|
@ -227,9 +230,9 @@ static inline float flushDenormalToZero(float f)
|
|||
|
||||
// HLE interface
|
||||
|
||||
typedef void(*HLECALL)(PPCInterpreter_t* hCPU);
|
||||
using HLECALL = void(*)(PPCInterpreter_t*);
|
||||
using HLEIDX = sint32;
|
||||
|
||||
typedef sint32 HLEIDX;
|
||||
HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall, std::string hleName);
|
||||
HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex);
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#include "Cafe/HW/Espresso/Const.h"
|
||||
#include "asm/x64util.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||
|
|
1695
src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
Normal file
1695
src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,18 @@
|
|||
#pragma once
|
||||
|
||||
#include "HW/Espresso/Recompiler/IML/IMLInstruction.h"
|
||||
#include "../PPCRecompiler.h"
|
||||
|
||||
bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_cleanupAArch64Code(void* code, size_t size);
|
||||
|
||||
void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
// architecture specific constants
|
||||
namespace IMLArchAArch64
|
||||
{
|
||||
static constexpr int PHYSREG_GPR_BASE = 0;
|
||||
static constexpr int PHYSREG_GPR_COUNT = 25;
|
||||
static constexpr int PHYSREG_FPR_BASE = PHYSREG_GPR_COUNT;
|
||||
static constexpr int PHYSREG_FPR_COUNT = 31;
|
||||
}; // namespace IMLArchAArch64
|
1672
src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
Normal file
1672
src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,104 +1,56 @@
|
|||
|
||||
typedef struct
|
||||
#include "../PPCRecompiler.h" // todo - get rid of dependency
|
||||
|
||||
#include "x86Emitter.h"
|
||||
|
||||
struct x64RelocEntry_t
|
||||
{
|
||||
x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {};
|
||||
|
||||
uint32 offset;
|
||||
uint8 type;
|
||||
void* extraInfo;
|
||||
}x64RelocEntry_t;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
struct x64GenContext_t
|
||||
{
|
||||
uint8* codeBuffer;
|
||||
sint32 codeBufferIndex;
|
||||
sint32 codeBufferSize;
|
||||
// cr state
|
||||
sint32 activeCRRegister; // current x86 condition flags reflect this cr* register
|
||||
sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned)
|
||||
IMLSegment* currentSegment{};
|
||||
x86Assembler64* emitter;
|
||||
sint32 m_currentInstructionEmitIndex;
|
||||
|
||||
x64GenContext_t()
|
||||
{
|
||||
emitter = new x86Assembler64();
|
||||
}
|
||||
|
||||
~x64GenContext_t()
|
||||
{
|
||||
delete emitter;
|
||||
}
|
||||
|
||||
IMLInstruction* GetNextInstruction(sint32 relativeIndex = 1)
|
||||
{
|
||||
sint32 index = m_currentInstructionEmitIndex + relativeIndex;
|
||||
if(index < 0 || index >= (sint32)currentSegment->imlList.size())
|
||||
return nullptr;
|
||||
return currentSegment->imlList.data() + index;
|
||||
}
|
||||
|
||||
// relocate offsets
|
||||
x64RelocEntry_t* relocateOffsetTable;
|
||||
sint32 relocateOffsetTableSize;
|
||||
sint32 relocateOffsetTableCount;
|
||||
}x64GenContext_t;
|
||||
|
||||
// Some of these are defined by winnt.h and gnu headers
|
||||
#undef REG_EAX
|
||||
#undef REG_ECX
|
||||
#undef REG_EDX
|
||||
#undef REG_EBX
|
||||
#undef REG_ESP
|
||||
#undef REG_EBP
|
||||
#undef REG_ESI
|
||||
#undef REG_EDI
|
||||
#undef REG_NONE
|
||||
#undef REG_RAX
|
||||
#undef REG_RCX
|
||||
#undef REG_RDX
|
||||
#undef REG_RBX
|
||||
#undef REG_RSP
|
||||
#undef REG_RBP
|
||||
#undef REG_RSI
|
||||
#undef REG_RDI
|
||||
#undef REG_R8
|
||||
#undef REG_R9
|
||||
#undef REG_R10
|
||||
#undef REG_R11
|
||||
#undef REG_R12
|
||||
#undef REG_R13
|
||||
#undef REG_R14
|
||||
#undef REG_R15
|
||||
|
||||
#define REG_EAX 0
|
||||
#define REG_ECX 1
|
||||
#define REG_EDX 2
|
||||
#define REG_EBX 3
|
||||
#define REG_ESP 4 // reserved for low half of hCPU pointer
|
||||
#define REG_EBP 5
|
||||
#define REG_ESI 6
|
||||
#define REG_EDI 7
|
||||
#define REG_NONE -1
|
||||
|
||||
#define REG_RAX 0
|
||||
#define REG_RCX 1
|
||||
#define REG_RDX 2
|
||||
#define REG_RBX 3
|
||||
#define REG_RSP 4 // reserved for hCPU pointer
|
||||
#define REG_RBP 5
|
||||
#define REG_RSI 6
|
||||
#define REG_RDI 7
|
||||
#define REG_R8 8
|
||||
#define REG_R9 9
|
||||
#define REG_R10 10
|
||||
#define REG_R11 11
|
||||
#define REG_R12 12
|
||||
#define REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
|
||||
#define REG_R14 14 // reserved as temporary register
|
||||
#define REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
|
||||
|
||||
#define REG_AL 0
|
||||
#define REG_CL 1
|
||||
#define REG_DL 2
|
||||
#define REG_BL 3
|
||||
#define REG_AH 4
|
||||
#define REG_CH 5
|
||||
#define REG_DH 6
|
||||
#define REG_BH 7
|
||||
std::vector<x64RelocEntry_t> relocateOffsetTable2;
|
||||
};
|
||||
|
||||
// reserved registers
|
||||
#define REG_RESV_TEMP (REG_R14)
|
||||
#define REG_RESV_HCPU (REG_RSP)
|
||||
#define REG_RESV_MEMBASE (REG_R13)
|
||||
#define REG_RESV_RECDATA (REG_R15)
|
||||
#define REG_RESV_TEMP (X86_REG_R14)
|
||||
#define REG_RESV_HCPU (X86_REG_RSP)
|
||||
#define REG_RESV_MEMBASE (X86_REG_R13)
|
||||
#define REG_RESV_RECDATA (X86_REG_R15)
|
||||
|
||||
// reserved floating-point registers
|
||||
#define REG_RESV_FPR_TEMP (15)
|
||||
|
||||
#define reg32ToReg16(__x) (__x) // deprecated
|
||||
|
||||
extern sint32 x64Gen_registerMap[12];
|
||||
|
||||
#define tempToRealRegister(__x) (x64Gen_registerMap[__x])
|
||||
#define tempToRealFPRRegister(__x) (__x)
|
||||
#define reg32ToReg16(__x) (__x)
|
||||
|
||||
// deprecated condition flags
|
||||
enum
|
||||
{
|
||||
X86_CONDITION_EQUAL, // or zero
|
||||
|
@ -119,36 +71,23 @@ enum
|
|||
X86_CONDITION_NONE, // no condition, jump always
|
||||
};
|
||||
|
||||
#define PPCREC_CR_TEMPORARY (8) // never stored
|
||||
#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI)
|
||||
#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
|
||||
#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
|
||||
|
||||
#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction
|
||||
#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset
|
||||
#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment
|
||||
|
||||
#define PPC_X64_GPR_USABLE_REGISTERS (16-4)
|
||||
#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register
|
||||
|
||||
|
||||
bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext);
|
||||
bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset);
|
||||
|
||||
void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed);
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed);
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
|
||||
|
||||
// ASM gen
|
||||
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v);
|
||||
|
@ -196,9 +135,6 @@ void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstReg
|
|||
void x64Gen_and_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
|
||||
void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32);
|
||||
|
||||
void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister);
|
||||
|
||||
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_add_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_add_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
|
@ -207,9 +143,6 @@ void x64Gen_sub_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des
|
|||
void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_sub_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, sint32 memImmS32, uint64 immU32);
|
||||
void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32);
|
||||
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32);
|
||||
void x64Gen_imul_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 operandRegister);
|
||||
void x64Gen_idiv_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister);
|
||||
|
@ -241,9 +174,7 @@ void x64Gen_not_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
|
|||
void x64Gen_neg_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_cdq(x64GenContext_t* x64GenContext);
|
||||
|
||||
void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister);
|
||||
|
||||
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
void x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister);
|
||||
|
@ -274,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
|
|||
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
|
@ -299,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
|
|||
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
|
@ -329,4 +262,8 @@ void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64G
|
|||
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
|
||||
|
||||
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
||||
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
||||
void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
||||
void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
||||
void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
||||
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
||||
void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
|
|
@ -1,5 +1,4 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "BackendX64.h"
|
||||
|
||||
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
|
@ -21,11 +20,10 @@ void _x64Gen_vex128_nds(x64GenContext_t* x64GenContext, uint8 opcodeMap, uint8 a
|
|||
x64Gen_writeU8(x64GenContext, opcode);
|
||||
}
|
||||
|
||||
#define VEX_PP_0F 0 // guessed
|
||||
#define VEX_PP_0F 0
|
||||
#define VEX_PP_66_0F 1
|
||||
#define VEX_PP_F3_0F 2 // guessed
|
||||
#define VEX_PP_F2_0F 3 // guessed
|
||||
|
||||
#define VEX_PP_F3_0F 2
|
||||
#define VEX_PP_F2_0F 3
|
||||
|
||||
void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB)
|
||||
{
|
|
@ -1,5 +1,4 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "BackendX64.h"
|
||||
|
||||
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32);
|
||||
|
||||
|
@ -69,6 +68,34 @@ void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist
|
|||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
||||
|
||||
void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0xC4);
|
||||
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
|
||||
x64Gen_writeU8(x64GenContext, 0x7B - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
||||
|
||||
void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
// SARX reg64, reg64, reg64
|
||||
x64Gen_writeU8(x64GenContext, 0xC4);
|
||||
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
|
||||
x64Gen_writeU8(x64GenContext, 0xFA - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
||||
|
||||
void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0xC4);
|
||||
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
|
||||
x64Gen_writeU8(x64GenContext, 0x7A - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
||||
|
||||
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
// SHLX reg64, reg64, reg64
|
||||
|
@ -77,4 +104,13 @@ void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist
|
|||
x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
||||
|
||||
void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0xC4);
|
||||
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
|
||||
x64Gen_writeU8(x64GenContext, 0x79 - registerB * 8);
|
||||
x64Gen_writeU8(x64GenContext, 0xF7);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
|
||||
}
|
469
src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
Normal file
469
src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp
Normal file
|
@ -0,0 +1,469 @@
|
|||
#include "../PPCRecompiler.h"
|
||||
#include "../IML/IML.h"
|
||||
#include "BackendX64.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
uint32 _regF64(IMLReg physReg);
|
||||
|
||||
uint32 _regI32(IMLReg r)
|
||||
{
|
||||
cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32);
|
||||
return (uint32)r.GetRegID();
|
||||
}
|
||||
|
||||
static x86Assembler64::GPR32 _reg32(sint8 physRegId)
|
||||
{
|
||||
return (x86Assembler64::GPR32)physRegId;
|
||||
}
|
||||
|
||||
static x86Assembler64::GPR8_REX _reg8(IMLReg r)
|
||||
{
|
||||
cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); // currently bool regs are implemented as 32bit registers
|
||||
return (x86Assembler64::GPR8_REX)r.GetRegID();
|
||||
}
|
||||
|
||||
static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
|
||||
{
|
||||
return (x86Assembler64::GPR32)regId;
|
||||
}
|
||||
|
||||
static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
|
||||
{
|
||||
return (x86Assembler64::GPR8_REX)regId;
|
||||
}
|
||||
|
||||
// load from memory
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
|
||||
{
|
||||
sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
|
||||
sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
|
||||
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
|
||||
if( indexed )
|
||||
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
|
||||
uint8 mode = imlInstruction->op_storeLoad.mode;
|
||||
|
||||
if( mode == PPCREC_FPR_LD_MODE_SINGLE )
|
||||
{
|
||||
// load byte swapped single into temporary FPR
|
||||
if( indexed )
|
||||
{
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
|
||||
if(g_CPUFeatures.x86.movbe)
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
else
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(g_CPUFeatures.x86.movbe)
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
else
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
}
|
||||
if(g_CPUFeatures.x86.movbe == false )
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP);
|
||||
|
||||
if (imlInstruction->op_storeLoad.flags2.notExpanded)
|
||||
{
|
||||
// leave value as single
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM);
|
||||
}
|
||||
}
|
||||
else if( mode == PPCREC_FPR_LD_MODE_DOUBLE )
|
||||
{
|
||||
if( g_CPUFeatures.x86.avx )
|
||||
{
|
||||
if( indexed )
|
||||
{
|
||||
// calculate offset
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
|
||||
// load value
|
||||
x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0);
|
||||
x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0);
|
||||
x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( indexed )
|
||||
{
|
||||
// calculate offset
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
|
||||
// load double low part to temporaryFPR
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP);
|
||||
// calculate offset again
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
|
||||
// load double high part to temporaryFPR
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP);
|
||||
// load double from temporaryFPR
|
||||
x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
else
|
||||
{
|
||||
// load double low part to temporaryFPR
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP);
|
||||
// load double high part to temporaryFPR
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP);
|
||||
// load double from temporaryFPR
|
||||
x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// store to memory
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
|
||||
{
|
||||
sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
|
||||
sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
|
||||
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
|
||||
if( indexed )
|
||||
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
|
||||
uint8 mode = imlInstruction->op_storeLoad.mode;
|
||||
if( mode == PPCREC_FPR_ST_MODE_SINGLE )
|
||||
{
|
||||
if (imlInstruction->op_storeLoad.flags2.notExpanded)
|
||||
{
|
||||
// value is already in single format
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM);
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
if(g_CPUFeatures.x86.movbe == false )
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if( indexed )
|
||||
{
|
||||
if( realRegisterMem == realRegisterMem2 )
|
||||
assert_dbg();
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if(g_CPUFeatures.x86.movbe)
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
if( indexed )
|
||||
{
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
}
|
||||
else if( mode == PPCREC_FPR_ST_MODE_DOUBLE )
|
||||
{
|
||||
if( indexed )
|
||||
{
|
||||
if( realRegisterMem == realRegisterMem2 )
|
||||
assert_dbg();
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
// store double low part
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP);
|
||||
// store double high part
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP);
|
||||
if( indexed )
|
||||
{
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
}
|
||||
else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 )
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if( indexed )
|
||||
{
|
||||
cemu_assert_debug(realRegisterMem == realRegisterMem2);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// FPR op FPR
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT )
|
||||
{
|
||||
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA);
|
||||
x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr);
|
||||
return;
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT )
|
||||
{
|
||||
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
|
||||
x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
|
||||
return;
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
|
||||
{
|
||||
cemu_assert_debug(imlInstruction->op_fpr_r_r.regR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
|
||||
cemu_assert_debug(imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32); // supporting only 32bit floats as input for now
|
||||
// exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
|
||||
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
if (imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32)
|
||||
{
|
||||
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, regFpr, regGpr); // using reg32 as reg64 param here is ok. We'll refactor later
|
||||
// float to double
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regFpr, regFpr);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN )
|
||||
{
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY )
|
||||
{
|
||||
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE )
|
||||
{
|
||||
x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD )
|
||||
{
|
||||
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
|
||||
{
|
||||
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ )
|
||||
{
|
||||
x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA);
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
// move to FPR register
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* FPR = op (fprA, fprB)
|
||||
*/
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r.regR);
|
||||
uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA);
|
||||
uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB);
|
||||
|
||||
if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
|
||||
{
|
||||
if (regR == regA)
|
||||
{
|
||||
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
else if (regR == regB)
|
||||
{
|
||||
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
|
||||
{
|
||||
// todo: Use AVX 3-operand VADDSD if available
|
||||
if (regR == regA)
|
||||
{
|
||||
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
else if (regR == regB)
|
||||
{
|
||||
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
|
||||
{
|
||||
if( regR == regA )
|
||||
{
|
||||
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
else if( regR == regB )
|
||||
{
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
/*
|
||||
* FPR = op (fprA, fprB, fprC)
|
||||
*/
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r_r.regR);
|
||||
uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r_r.regA);
|
||||
uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB);
|
||||
uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC);
|
||||
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT )
|
||||
{
|
||||
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
|
||||
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
|
||||
// select C
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC);
|
||||
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
|
||||
// select B
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
uint32 regR = _regF64(imlInstruction->op_fpr_r.regR);
|
||||
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE )
|
||||
{
|
||||
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE )
|
||||
{
|
||||
x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS )
|
||||
{
|
||||
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS )
|
||||
{
|
||||
x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM )
|
||||
{
|
||||
// convert to 32bit single
|
||||
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
// convert back to 64bit double
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
|
||||
{
|
||||
// convert bottom to 64bit double
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
auto regR = _reg8(imlInstruction->op_fpr_compare.regR);
|
||||
auto regA = _regF64(imlInstruction->op_fpr_compare.regA);
|
||||
auto regB = _regF64(imlInstruction->op_fpr_compare.regB);
|
||||
|
||||
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR));
|
||||
x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB);
|
||||
|
||||
if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT)
|
||||
{
|
||||
// GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both)
|
||||
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR);
|
||||
return;
|
||||
}
|
||||
else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U)
|
||||
{
|
||||
// unordered case can be checked via PF
|
||||
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR);
|
||||
return;
|
||||
}
|
||||
|
||||
// remember unordered state
|
||||
auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP));
|
||||
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions
|
||||
|
||||
X86Cond x86Cond;
|
||||
switch (imlInstruction->op_fpr_compare.cond)
|
||||
{
|
||||
case IMLCondition::UNORDERED_LT:
|
||||
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR);
|
||||
break;
|
||||
case IMLCondition::UNORDERED_EQ:
|
||||
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR);
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero
|
||||
}
|
|
@ -1,62 +1,31 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "BackendX64.h"
|
||||
|
||||
// x86/x64 extension opcodes that could be useful:
|
||||
// ANDN
|
||||
// mulx, rorx, sarx, shlx, shrx
|
||||
// PDEP, PEXT
|
||||
|
||||
void x64Gen_checkBuffer(x64GenContext_t* x64GenContext)
|
||||
{
|
||||
// todo
|
||||
}
|
||||
|
||||
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v)
|
||||
{
|
||||
if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize )
|
||||
{
|
||||
x64GenContext->codeBufferSize *= 2;
|
||||
x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
|
||||
}
|
||||
*(uint8*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
|
||||
x64GenContext->codeBufferIndex++;
|
||||
x64GenContext->emitter->_emitU8(v);
|
||||
}
|
||||
|
||||
void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v)
|
||||
{
|
||||
if( x64GenContext->codeBufferIndex+2 > x64GenContext->codeBufferSize )
|
||||
{
|
||||
x64GenContext->codeBufferSize *= 2;
|
||||
x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
|
||||
}
|
||||
*(uint16*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
|
||||
x64GenContext->codeBufferIndex += 2;
|
||||
x64GenContext->emitter->_emitU16(v);
|
||||
}
|
||||
|
||||
void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v)
|
||||
{
|
||||
if( x64GenContext->codeBufferIndex+4 > x64GenContext->codeBufferSize )
|
||||
{
|
||||
x64GenContext->codeBufferSize *= 2;
|
||||
x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
|
||||
}
|
||||
*(uint32*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
|
||||
x64GenContext->codeBufferIndex += 4;
|
||||
x64GenContext->emitter->_emitU32(v);
|
||||
}
|
||||
|
||||
void x64Gen_writeU64(x64GenContext_t* x64GenContext, uint64 v)
|
||||
{
|
||||
if( x64GenContext->codeBufferIndex+8 > x64GenContext->codeBufferSize )
|
||||
{
|
||||
x64GenContext->codeBufferSize *= 2;
|
||||
x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
|
||||
}
|
||||
*(uint64*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
|
||||
x64GenContext->codeBufferIndex += 8;
|
||||
x64GenContext->emitter->_emitU64(v);
|
||||
}
|
||||
|
||||
#include "x64Emit.hpp"
|
||||
#include "X64Emit.hpp"
|
||||
|
||||
void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32)
|
||||
{
|
||||
|
@ -67,7 +36,7 @@ void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataReg
|
|||
forceUseOffset = true;
|
||||
}
|
||||
|
||||
if (memRegisterB64 == REG_NONE)
|
||||
if (memRegisterB64 == X86_REG_NONE)
|
||||
{
|
||||
// memRegisterA64 + memImmS32
|
||||
uint8 modRM = (dataRegister & 7) * 8 + (memRegisterA64 & 7);
|
||||
|
@ -352,7 +321,7 @@ void x64Gen_mov_mem32Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
|
|||
void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32)
|
||||
{
|
||||
// MOV QWORD [<memReg>+<memImmU32>], dataImmU32
|
||||
if( memRegister == REG_R14 )
|
||||
if( memRegister == X86_REG_R14 )
|
||||
{
|
||||
sint32 memImmS32 = (sint32)memImmU32;
|
||||
if( memImmS32 == 0 )
|
||||
|
@ -384,7 +353,7 @@ void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
|
|||
void x64Gen_mov_mem8Reg64_imm8(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint8 dataImmU8)
|
||||
{
|
||||
// MOV BYTE [<memReg64>+<memImmU32>], dataImmU8
|
||||
if( memRegister == REG_RSP )
|
||||
if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
sint32 memImmS32 = (sint32)memImmU32;
|
||||
if( memImmS32 >= -128 && memImmS32 <= 127 )
|
||||
|
@ -625,7 +594,7 @@ void _x64_op_reg64Low_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegist
|
|||
if (memRegister64 >= 8)
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
x64Gen_writeU8(x64GenContext, opByte);
|
||||
_x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, REG_NONE, memImmS32);
|
||||
_x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, X86_REG_NONE, memImmS32);
|
||||
}
|
||||
|
||||
void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32)
|
||||
|
@ -643,40 +612,6 @@ void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRe
|
|||
_x64_op_reg64Low_mem8Reg64(x64GenContext, dstRegister, memRegister64, memImmS32, 0x88);
|
||||
}
|
||||
|
||||
void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister)
|
||||
{
|
||||
// LOCK CMPXCHG DWORD [<reg64> + <reg64> + <imm64>], <srcReg64> (low dword)
|
||||
x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix
|
||||
|
||||
if( srcRegister >= 8 || memRegisterA64 >= 8|| memRegisterB64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegisterA64>=8)?1:0)+((memRegisterB64>=8)?2:0));
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0xB1);
|
||||
|
||||
_x64Gen_writeMODRMDeprecated(x64GenContext, srcRegister, memRegisterA64, memRegisterB64, memImmS32);
|
||||
}
|
||||
|
||||
void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister)
|
||||
{
|
||||
// LOCK CMPXCHG DWORD [<reg64> + <imm64>], <srcReg64> (low dword)
|
||||
x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix
|
||||
|
||||
if( srcRegister >= 8 || memRegister64 >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegister64>=8)?1:0));
|
||||
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0xB1);
|
||||
|
||||
if( memImmS32 == 0 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x45+(srcRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x00);
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
|
||||
{
|
||||
// ADD <destReg>, <srcReg>
|
||||
|
@ -732,7 +667,7 @@ void x64Gen_add_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
|
|||
}
|
||||
else
|
||||
{
|
||||
if( srcRegister == REG_RAX )
|
||||
if( srcRegister == X86_REG_RAX )
|
||||
{
|
||||
// special EAX short form
|
||||
x64Gen_writeU8(x64GenContext, 0x05);
|
||||
|
@ -772,7 +707,7 @@ void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
|
|||
}
|
||||
else
|
||||
{
|
||||
if( srcRegister == REG_RAX )
|
||||
if( srcRegister == X86_REG_RAX )
|
||||
{
|
||||
// special EAX short form
|
||||
x64Gen_writeU8(x64GenContext, 0x2D);
|
||||
|
@ -811,7 +746,7 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
|
|||
{
|
||||
// SUB <mem32_memReg64>, <imm32>
|
||||
sint32 immS32 = (sint32)immU32;
|
||||
if( memRegister == REG_RSP )
|
||||
if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
if( memImmS32 >= 128 )
|
||||
{
|
||||
|
@ -843,64 +778,11 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis
|
|||
}
|
||||
}
|
||||
|
||||
void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
|
||||
{
|
||||
// SBB <destReg64_low32>, <srcReg64_low32>
|
||||
if( destRegister >= 8 && srcRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x45);
|
||||
else if( srcRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x44);
|
||||
else if( destRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
x64Gen_writeU8(x64GenContext, 0x19);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7));
|
||||
}
|
||||
|
||||
void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
|
||||
{
|
||||
// ADC <destReg64_low32>, <srcReg64_low32>
|
||||
if( destRegister >= 8 && srcRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x45);
|
||||
else if( srcRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x44);
|
||||
else if( destRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
x64Gen_writeU8(x64GenContext, 0x11);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7));
|
||||
}
|
||||
|
||||
void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32)
|
||||
{
|
||||
sint32 immS32 = (sint32)immU32;
|
||||
if( srcRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
if( immS32 >= -128 && immS32 <= 127 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x83);
|
||||
x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7));
|
||||
x64Gen_writeU8(x64GenContext, (uint8)immS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( srcRegister == REG_RAX )
|
||||
{
|
||||
// special EAX short form
|
||||
x64Gen_writeU8(x64GenContext, 0x15);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x81);
|
||||
x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7));
|
||||
}
|
||||
x64Gen_writeU32(x64GenContext, immU32);
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32)
|
||||
{
|
||||
// DEC dword [<reg64>+imm]
|
||||
sint32 memoryImmS32 = (sint32)memoryImmU32;
|
||||
if (memoryRegister != REG_RSP)
|
||||
if (memoryRegister != X86_REG_RSP)
|
||||
assert_dbg(); // not supported yet
|
||||
if (memoryImmS32 >= -128 && memoryImmS32 <= 127)
|
||||
{
|
||||
|
@ -981,7 +863,7 @@ void x64Gen_and_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
|
|||
}
|
||||
else
|
||||
{
|
||||
if( srcRegister == REG_RAX )
|
||||
if( srcRegister == X86_REG_RAX )
|
||||
{
|
||||
// special EAX short form
|
||||
x64Gen_writeU8(x64GenContext, 0x25);
|
||||
|
@ -1026,7 +908,7 @@ void x64Gen_test_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegi
|
|||
sint32 immS32 = (sint32)immU32;
|
||||
if( srcRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41);
|
||||
if( srcRegister == REG_RAX )
|
||||
if( srcRegister == X86_REG_RAX )
|
||||
{
|
||||
// special EAX short form
|
||||
x64Gen_writeU8(x64GenContext, 0xA9);
|
||||
|
@ -1052,7 +934,7 @@ void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
|
|||
}
|
||||
else
|
||||
{
|
||||
if( srcRegister == REG_RAX )
|
||||
if( srcRegister == X86_REG_RAX )
|
||||
{
|
||||
// special RAX short form
|
||||
x64Gen_writeU8(x64GenContext, 0x3D);
|
||||
|
@ -1082,7 +964,7 @@ void x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des
|
|||
void x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 memRegister, sint32 memImmS32)
|
||||
{
|
||||
// CMP <destReg64_lowDWORD>, DWORD [<memRegister>+<immS32>]
|
||||
if( memRegister == REG_RSP )
|
||||
if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
if( memImmS32 >= -128 && memImmS32 <= 127 )
|
||||
assert_dbg(); // todo -> Shorter instruction form
|
||||
|
@ -1112,7 +994,7 @@ void x64Gen_or_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegist
|
|||
}
|
||||
else
|
||||
{
|
||||
if( srcRegister == REG_RAX )
|
||||
if( srcRegister == X86_REG_RAX )
|
||||
{
|
||||
// special EAX short form
|
||||
x64Gen_writeU8(x64GenContext, 0x0D);
|
||||
|
@ -1172,7 +1054,7 @@ void x64Gen_xor_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis
|
|||
}
|
||||
else
|
||||
{
|
||||
if( srcRegister == REG_RAX )
|
||||
if( srcRegister == X86_REG_RAX )
|
||||
{
|
||||
// special EAX short form
|
||||
x64Gen_writeU8(x64GenContext, 0x35);
|
||||
|
@ -1326,16 +1208,6 @@ void x64Gen_cdq(x64GenContext_t* x64GenContext)
|
|||
x64Gen_writeU8(x64GenContext, 0x99);
|
||||
}
|
||||
|
||||
void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister)
|
||||
{
|
||||
if( destRegister >= 8 )
|
||||
x64Gen_writeU8(x64GenContext, 0x41|8);
|
||||
else
|
||||
x64Gen_writeU8(x64GenContext, 0x40|8);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
|
||||
}
|
||||
|
||||
void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister)
|
||||
{
|
||||
if( destRegister >= 8 )
|
||||
|
@ -1344,16 +1216,6 @@ void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destReg
|
|||
x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
|
||||
}
|
||||
|
||||
void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister)
|
||||
{
|
||||
assert_dbg(); // do not use this instruction, it's result is always undefined. Instead use ROL <reg16>, 8
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
//if( destRegister >= 8 )
|
||||
// x64Gen_writeU8(x64GenContext, 0x41);
|
||||
//x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
//x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7));
|
||||
}
|
||||
|
||||
void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister)
|
||||
{
|
||||
// SSE4
|
||||
|
@ -1388,7 +1250,7 @@ void x64Gen_setcc_mem8(x64GenContext_t* x64GenContext, sint32 conditionType, sin
|
|||
{
|
||||
// SETcc [<reg64>+imm]
|
||||
sint32 memoryImmS32 = (sint32)memoryImmU32;
|
||||
if( memoryRegister != REG_RSP )
|
||||
if( memoryRegister != X86_REG_RSP )
|
||||
assert_dbg(); // not supported
|
||||
if( memoryRegister >= 8 )
|
||||
assert_dbg(); // not supported
|
||||
|
@ -1627,7 +1489,7 @@ void x64Gen_bt_mem8(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint3
|
|||
{
|
||||
// BT [<reg64>+imm], bitIndex (bit test)
|
||||
sint32 memoryImmS32 = (sint32)memoryImmU32;
|
||||
if( memoryRegister != REG_RSP )
|
||||
if( memoryRegister != X86_REG_RSP )
|
||||
assert_dbg(); // not supported yet
|
||||
if( memoryImmS32 >= -128 && memoryImmS32 <= 127 )
|
||||
{
|
||||
|
@ -1662,7 +1524,7 @@ void x64Gen_jmp_imm32(x64GenContext_t* x64GenContext, uint32 destImm32)
|
|||
|
||||
void x64Gen_jmp_memReg64(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 immU32)
|
||||
{
|
||||
if( memRegister == REG_NONE )
|
||||
if( memRegister == X86_REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
|
@ -1,6 +1,4 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "BackendX64.h"
|
||||
|
||||
void x64Gen_genSSEVEXPrefix2(x64GenContext_t* x64GenContext, sint32 xmmRegister1, sint32 xmmRegister2, bool use64BitMode)
|
||||
{
|
||||
|
@ -44,7 +42,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe
|
|||
// SSE2
|
||||
// move two doubles from memory into xmm register
|
||||
// MOVUPD <xmm>, [<reg>+<imm>]
|
||||
if( memRegister == REG_ESP )
|
||||
if( memRegister == X86_REG_ESP )
|
||||
{
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
// 66 0F 10 84 E4 23 01 00 00
|
||||
|
@ -56,7 +54,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe
|
|||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
else if( memRegister == X86_REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
|
@ -76,7 +74,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe
|
|||
// SSE2
|
||||
// move two doubles from memory into xmm register
|
||||
// MOVUPD [<reg>+<imm>], <xmm>
|
||||
if( memRegister == REG_ESP )
|
||||
if( memRegister == X86_REG_ESP )
|
||||
{
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
|
@ -87,7 +85,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe
|
|||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
else if( memRegister == X86_REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
|
@ -106,7 +104,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
|
|||
{
|
||||
// SSE3
|
||||
// move one double from memory into lower and upper half of a xmm register
|
||||
if( memRegister == REG_RSP )
|
||||
if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<reg>+<imm>]
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
|
@ -119,7 +117,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
|
|||
x64Gen_writeU8(x64GenContext, 0xE4);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_R15 )
|
||||
else if( memRegister == X86_REG_R15 )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<reg>+<imm>]
|
||||
// todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range
|
||||
|
@ -131,7 +129,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe
|
|||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_NONE )
|
||||
else if( memRegister == X86_REG_NONE )
|
||||
{
|
||||
// MOVDDUP <xmm>, [<imm>]
|
||||
// 36 F2 0F 12 05 - 00 00 00 00
|
||||
|
@ -185,7 +183,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
|
|||
{
|
||||
// SSE2
|
||||
// move lower 64bits (double) of xmm register to memory location
|
||||
if( memRegister == REG_NONE )
|
||||
if( memRegister == X86_REG_NONE )
|
||||
{
|
||||
// MOVSD [<imm>], <xmm>
|
||||
// F2 0F 11 05 - 45 23 01 00
|
||||
|
@ -197,7 +195,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
|
|||
//x64Gen_writeU8(x64GenContext, 0x05+xmmRegister*8);
|
||||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == REG_RSP )
|
||||
else if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
// MOVSD [RSP+<imm>], <xmm>
|
||||
// F2 0F 11 84 24 - 33 22 11 00
|
||||
|
@ -215,11 +213,42 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
|
|||
}
|
||||
}
|
||||
|
||||
void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
// MOVSD <xmm>, [RSP+<imm>]
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == 15 )
|
||||
{
|
||||
// MOVSD <xmm>, [R15+<imm>]
|
||||
x64Gen_writeU8(x64GenContext, 0x36);
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE3
|
||||
// move one double from memory into lower half of a xmm register, leave upper half unchanged(?)
|
||||
if( memRegister == REG_NONE )
|
||||
if( memRegister == X86_REG_NONE )
|
||||
{
|
||||
// MOVLPD <xmm>, [<imm>]
|
||||
//x64Gen_writeU8(x64GenContext, 0x66);
|
||||
|
@ -229,7 +258,7 @@ void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmReg
|
|||
//x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
assert_dbg();
|
||||
}
|
||||
else if( memRegister == REG_RSP )
|
||||
else if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
// MOVLPD <xmm>, [<reg64>+<imm>]
|
||||
// 66 0F 12 84 24 - 33 22 11 00
|
||||
|
@ -348,11 +377,11 @@ void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
|
|||
void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if (memRegister == REG_NONE)
|
||||
if (memRegister == X86_REG_NONE)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
else if (memRegister == REG_R14)
|
||||
else if (memRegister == X86_REG_R14)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
|
||||
|
@ -404,7 +433,7 @@ void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
|
|||
{
|
||||
// SSE2
|
||||
// compare bottom double with double from memory location
|
||||
if( memoryReg == REG_R15 )
|
||||
if( memoryReg == X86_REG_R15 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
|
@ -432,7 +461,7 @@ void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
|
|||
{
|
||||
// SSE2
|
||||
// compare bottom float with float from memory location
|
||||
if (memoryReg == REG_R15)
|
||||
if (memoryReg == X86_REG_R15)
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
|
@ -448,7 +477,7 @@ void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRe
|
|||
{
|
||||
// SSE2
|
||||
// and xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
if( memReg == X86_REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, memReg, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
|
@ -464,7 +493,7 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
|
|||
{
|
||||
// SSE2
|
||||
// xor xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
if( memReg == X86_REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
|
@ -479,11 +508,11 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
|
|||
void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if (memRegister == REG_NONE)
|
||||
if (memRegister == X86_REG_NONE)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
else if (memRegister == REG_R14)
|
||||
else if (memRegister == X86_REG_R14)
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45);
|
||||
|
@ -502,7 +531,7 @@ void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR
|
|||
{
|
||||
// SSE2
|
||||
// and xmm register with 128 bit value from memory
|
||||
if( memReg == REG_R15 )
|
||||
if( memReg == X86_REG_R15 )
|
||||
{
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg?
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
|
@ -528,7 +557,7 @@ void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xm
|
|||
{
|
||||
// SSE2
|
||||
// doubleword integer compare
|
||||
if( memReg == REG_R15 )
|
||||
if( memReg == X86_REG_R15 )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true);
|
||||
|
@ -563,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis
|
|||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
|
||||
{
|
||||
// SSE2
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
|
@ -610,7 +649,7 @@ void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xm
|
|||
{
|
||||
// SSE2
|
||||
// converts two signed 32bit integers to two doubles
|
||||
if( memReg == REG_RSP )
|
||||
if( memReg == X86_REG_RSP )
|
||||
{
|
||||
x64Gen_writeU8(x64GenContext, 0x66);
|
||||
x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, false);
|
||||
|
@ -684,7 +723,7 @@ void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
|
|||
void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if( memRegister == REG_NONE )
|
||||
if( memRegister == X86_REG_NONE )
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
|
@ -203,7 +203,6 @@ template<class opcodeBytes, typename TA, typename TB>
|
|||
void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB)
|
||||
{
|
||||
static_assert(TA::getType() == MODRM_OPR_TYPE::REG);
|
||||
x64Gen_checkBuffer(x64GenContext);
|
||||
// REX prefix
|
||||
// 0100 WRXB
|
||||
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)
|
4335
src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h
Normal file
4335
src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h
Normal file
File diff suppressed because it is too large
Load diff
16
src/Cafe/HW/Espresso/Recompiler/IML/IML.h
Normal file
16
src/Cafe/HW/Espresso/Recompiler/IML/IML.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "IMLInstruction.h"
|
||||
#include "IMLSegment.h"
|
||||
|
||||
// optimizer passes
|
||||
void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext);
|
||||
|
||||
// debug
|
||||
void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut);
|
||||
void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false);
|
||||
void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false);
|
5
src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
Normal file
5
src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
Normal file
|
@ -0,0 +1,5 @@
|
|||
#include "IML.h"
|
||||
//#include "PPCRecompilerIml.h"
|
||||
#include "util/helpers/fixedSizeList.h"
|
||||
|
||||
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
|
561
src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp
Normal file
561
src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp
Normal file
|
@ -0,0 +1,561 @@
|
|||
#include "IML.h"
|
||||
#include "IMLInstruction.h"
|
||||
#include "IMLSegment.h"
|
||||
#include "IMLRegisterAllocatorRanges.h"
|
||||
#include "util/helpers/StringBuf.h"
|
||||
|
||||
#include "../PPCRecompiler.h"
|
||||
|
||||
const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
|
||||
{
|
||||
static char _tempOpcodename[32];
|
||||
uint32 op = iml->operation;
|
||||
if (op == PPCREC_IML_OP_ASSIGN)
|
||||
return "MOV";
|
||||
else if (op == PPCREC_IML_OP_ADD)
|
||||
return "ADD";
|
||||
else if (op == PPCREC_IML_OP_ADD_WITH_CARRY)
|
||||
return "ADC";
|
||||
else if (op == PPCREC_IML_OP_SUB)
|
||||
return "SUB";
|
||||
else if (op == PPCREC_IML_OP_OR)
|
||||
return "OR";
|
||||
else if (op == PPCREC_IML_OP_AND)
|
||||
return "AND";
|
||||
else if (op == PPCREC_IML_OP_XOR)
|
||||
return "XOR";
|
||||
else if (op == PPCREC_IML_OP_LEFT_SHIFT)
|
||||
return "LSH";
|
||||
else if (op == PPCREC_IML_OP_RIGHT_SHIFT_U)
|
||||
return "RSH";
|
||||
else if (op == PPCREC_IML_OP_RIGHT_SHIFT_S)
|
||||
return "ARSH";
|
||||
else if (op == PPCREC_IML_OP_LEFT_ROTATE)
|
||||
return "LROT";
|
||||
else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED)
|
||||
return "MULS";
|
||||
else if (op == PPCREC_IML_OP_DIVIDE_SIGNED)
|
||||
return "DIVS";
|
||||
else if (op == PPCREC_IML_OP_FPR_ASSIGN)
|
||||
return "FMOV";
|
||||
else if (op == PPCREC_IML_OP_FPR_ADD)
|
||||
return "FADD";
|
||||
else if (op == PPCREC_IML_OP_FPR_SUB)
|
||||
return "FSUB";
|
||||
else if (op == PPCREC_IML_OP_FPR_MULTIPLY)
|
||||
return "FMUL";
|
||||
else if (op == PPCREC_IML_OP_FPR_DIVIDE)
|
||||
return "FDIV";
|
||||
else if (op == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
|
||||
return "F32TOF64";
|
||||
else if (op == PPCREC_IML_OP_FPR_ABS)
|
||||
return "FABS";
|
||||
else if (op == PPCREC_IML_OP_FPR_NEGATE)
|
||||
return "FNEG";
|
||||
else if (op == PPCREC_IML_OP_FPR_NEGATIVE_ABS)
|
||||
return "FNABS";
|
||||
else if (op == PPCREC_IML_OP_FPR_FLOAT_TO_INT)
|
||||
return "F2I";
|
||||
else if (op == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
|
||||
return "I2F";
|
||||
else if (op == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
|
||||
return "BITMOVE";
|
||||
|
||||
sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type);
|
||||
return _tempOpcodename;
|
||||
}
|
||||
|
||||
std::string IMLDebug_GetRegName(IMLReg r)
|
||||
{
|
||||
std::string regName;
|
||||
uint32 regId = r.GetRegID();
|
||||
switch (r.GetRegFormat())
|
||||
{
|
||||
case IMLRegFormat::F32:
|
||||
regName.append("f");
|
||||
break;
|
||||
case IMLRegFormat::F64:
|
||||
regName.append("fd");
|
||||
break;
|
||||
case IMLRegFormat::I32:
|
||||
regName.append("i");
|
||||
break;
|
||||
case IMLRegFormat::I64:
|
||||
regName.append("r");
|
||||
break;
|
||||
default:
|
||||
DEBUG_BREAK;
|
||||
}
|
||||
regName.append(fmt::format("{}", regId));
|
||||
return regName;
|
||||
}
|
||||
|
||||
void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false)
|
||||
{
|
||||
strOutput.add(IMLDebug_GetRegName(virtualRegister));
|
||||
if (!isLast)
|
||||
strOutput.add(", ");
|
||||
}
|
||||
|
||||
void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false)
|
||||
{
|
||||
if (val < 0)
|
||||
{
|
||||
strOutput.add("-");
|
||||
val = -val;
|
||||
}
|
||||
strOutput.addFmt("0x{:08x}", val);
|
||||
if (!isLast)
|
||||
strOutput.add(", ");
|
||||
}
|
||||
|
||||
void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset)
|
||||
{
|
||||
// pad to 70 characters
|
||||
sint32 index = currentLineText.getLen();
|
||||
while (index < 70)
|
||||
{
|
||||
currentLineText.add(" ");
|
||||
index++;
|
||||
}
|
||||
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
||||
while (subrangeItr)
|
||||
{
|
||||
if (subrangeItr->interval.start.GetInstructionIndexEx() == offset)
|
||||
{
|
||||
if(subrangeItr->interval.start.IsInstructionIndex() && !subrangeItr->interval.start.IsOnInputEdge())
|
||||
currentLineText.add(".");
|
||||
else
|
||||
currentLineText.add("|");
|
||||
|
||||
currentLineText.addFmt("{:<4}", subrangeItr->GetVirtualRegister());
|
||||
}
|
||||
else if (subrangeItr->interval.end.GetInstructionIndexEx() == offset)
|
||||
{
|
||||
if(subrangeItr->interval.end.IsInstructionIndex() && !subrangeItr->interval.end.IsOnOutputEdge())
|
||||
currentLineText.add("* ");
|
||||
else
|
||||
currentLineText.add("| ");
|
||||
}
|
||||
else if (subrangeItr->interval.ContainsInstructionIndexEx(offset))
|
||||
{
|
||||
currentLineText.add("| ");
|
||||
}
|
||||
else
|
||||
{
|
||||
currentLineText.add(" ");
|
||||
}
|
||||
index += 5;
|
||||
// next
|
||||
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
||||
}
|
||||
}
|
||||
|
||||
std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg)
|
||||
{
|
||||
if (!ctx)
|
||||
{
|
||||
return "<NoNameWithoutCtx>";
|
||||
}
|
||||
// find segment index
|
||||
for (size_t i = 0; i < ctx->segmentList2.size(); i++)
|
||||
{
|
||||
if (ctx->segmentList2[i] == seg)
|
||||
{
|
||||
return fmt::format("Seg{:04x}", i);
|
||||
}
|
||||
}
|
||||
return "<SegmentNotInCtx>";
|
||||
}
|
||||
|
||||
std::string IMLDebug_GetConditionName(IMLCondition cond)
|
||||
{
|
||||
switch (cond)
|
||||
{
|
||||
case IMLCondition::EQ:
|
||||
return "EQ";
|
||||
case IMLCondition::NEQ:
|
||||
return "NEQ";
|
||||
case IMLCondition::UNSIGNED_GT:
|
||||
return "UGT";
|
||||
case IMLCondition::UNSIGNED_LT:
|
||||
return "ULT";
|
||||
case IMLCondition::SIGNED_GT:
|
||||
return "SGT";
|
||||
case IMLCondition::SIGNED_LT:
|
||||
return "SLT";
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
return "ukn";
|
||||
}
|
||||
|
||||
void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut)
|
||||
{
|
||||
const sint32 lineOffsetParameters = 10;//18;
|
||||
|
||||
StringBuf strOutput(1024);
|
||||
strOutput.reset();
|
||||
if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R)
|
||||
{
|
||||
if (inst.type == PPCREC_IML_TYPE_R_NAME)
|
||||
strOutput.add("R_NAME");
|
||||
else
|
||||
strOutput.add("NAME_R");
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
|
||||
if(inst.type == PPCREC_IML_TYPE_R_NAME)
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR);
|
||||
|
||||
strOutput.add("name_");
|
||||
if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999))
|
||||
{
|
||||
strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
|
||||
}
|
||||
if (inst.op_r_name.name >= PPCREC_NAME_FPR_HALF && inst.op_r_name.name < (PPCREC_NAME_FPR_HALF + 32*2))
|
||||
{
|
||||
strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR_HALF - inst.op_r_name.name)/2));
|
||||
if ((inst.op_r_name.name-PPCREC_NAME_FPR_HALF)&1)
|
||||
strOutput.add(".ps1");
|
||||
else
|
||||
strOutput.add(".ps0");
|
||||
}
|
||||
else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999))
|
||||
{
|
||||
strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0);
|
||||
}
|
||||
else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST)
|
||||
strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR);
|
||||
else if (inst.op_r_name.name == PPCREC_NAME_XER_CA)
|
||||
strOutput.add("xer.ca");
|
||||
else if (inst.op_r_name.name == PPCREC_NAME_XER_SO)
|
||||
strOutput.add("xer.so");
|
||||
else if (inst.op_r_name.name == PPCREC_NAME_XER_OV)
|
||||
strOutput.add("xer.ov");
|
||||
else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA)
|
||||
strOutput.add("cpuReservation.ea");
|
||||
else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL)
|
||||
strOutput.add("cpuReservation.value");
|
||||
else
|
||||
{
|
||||
strOutput.addFmt("name_ukn{}", inst.op_r_name.name);
|
||||
}
|
||||
if (inst.type != PPCREC_IML_TYPE_R_NAME)
|
||||
{
|
||||
strOutput.add(", ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true);
|
||||
}
|
||||
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_R_R)
|
||||
{
|
||||
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_R_R_R)
|
||||
{
|
||||
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY)
|
||||
{
|
||||
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_COMPARE)
|
||||
{
|
||||
strOutput.add("CMP ");
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB);
|
||||
strOutput.addFmt("{}", IMLDebug_GetConditionName(inst.op_compare.cond));
|
||||
strOutput.add(" -> ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32)
|
||||
{
|
||||
strOutput.add("CMP ");
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA);
|
||||
strOutput.addFmt("{}", inst.op_compare_s32.immS32);
|
||||
strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond));
|
||||
strOutput.add(" -> ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
|
||||
{
|
||||
strOutput.add("CJUMP ");
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true);
|
||||
if (!inst.op_conditional_jump.mustBeTrue)
|
||||
strOutput.add("(inverted)");
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_JUMP)
|
||||
{
|
||||
strOutput.add("JUMP");
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_R_R_S32)
|
||||
{
|
||||
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA);
|
||||
IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY)
|
||||
{
|
||||
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA);
|
||||
IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_R_S32)
|
||||
{
|
||||
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR);
|
||||
IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE ||
|
||||
inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
|
||||
{
|
||||
if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED)
|
||||
strOutput.add("LD_");
|
||||
else
|
||||
strOutput.add("ST_");
|
||||
|
||||
if (inst.op_storeLoad.flags2.signExtend)
|
||||
strOutput.add("S");
|
||||
else
|
||||
strOutput.add("U");
|
||||
strOutput.addFmt("{}", inst.op_storeLoad.copyWidth);
|
||||
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData);
|
||||
|
||||
if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
|
||||
strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2));
|
||||
else
|
||||
strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
|
||||
{
|
||||
strOutput.add("ATOMIC_ST_U32");
|
||||
|
||||
while ((sint32)strOutput.getLen() < lineOffsetParameters)
|
||||
strOutput.add(" ");
|
||||
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue);
|
||||
IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_NO_OP)
|
||||
{
|
||||
strOutput.add("NOP");
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_MACRO)
|
||||
{
|
||||
if (inst.operation == PPCREC_IML_MACRO_B_TO_REG)
|
||||
{
|
||||
strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg));
|
||||
}
|
||||
else if (inst.operation == PPCREC_IML_MACRO_BL)
|
||||
{
|
||||
strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16);
|
||||
}
|
||||
else if (inst.operation == PPCREC_IML_MACRO_B_FAR)
|
||||
{
|
||||
strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16);
|
||||
}
|
||||
else if (inst.operation == PPCREC_IML_MACRO_LEAVE)
|
||||
{
|
||||
strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param);
|
||||
}
|
||||
else if (inst.operation == PPCREC_IML_MACRO_HLE)
|
||||
{
|
||||
strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2);
|
||||
}
|
||||
else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES)
|
||||
{
|
||||
strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param);
|
||||
}
|
||||
else
|
||||
{
|
||||
strOutput.addFmt("MACRO ukn operation {}", inst.operation);
|
||||
}
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD)
|
||||
{
|
||||
strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData));
|
||||
if (inst.op_storeLoad.flags2.signExtend)
|
||||
strOutput.add("S");
|
||||
else
|
||||
strOutput.add("U");
|
||||
strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode);
|
||||
if (inst.op_storeLoad.flags2.notExpanded)
|
||||
{
|
||||
strOutput.addFmt(" <No expand>");
|
||||
}
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_STORE)
|
||||
{
|
||||
if (inst.op_storeLoad.flags2.signExtend)
|
||||
strOutput.add("S");
|
||||
else
|
||||
strOutput.add("U");
|
||||
strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32);
|
||||
strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_R)
|
||||
{
|
||||
strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{}", IMLDebug_GetRegName(inst.op_fpr_r.regR));
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
{
|
||||
strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA));
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R)
|
||||
{
|
||||
strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC));
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R)
|
||||
{
|
||||
strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB));
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
|
||||
{
|
||||
strOutput.addFmt("CYCLE_CHECK");
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
|
||||
{
|
||||
strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond));
|
||||
}
|
||||
else
|
||||
{
|
||||
strOutput.addFmt("Unknown iml type {}", inst.type);
|
||||
}
|
||||
disassemblyLineOut.assign(strOutput.c_str());
|
||||
}
|
||||
|
||||
void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
|
||||
{
|
||||
StringBuf strOutput(4096);
|
||||
|
||||
strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth);
|
||||
if (imlSegment->isEnterable)
|
||||
{
|
||||
strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress);
|
||||
}
|
||||
if (imlSegment->deadCodeEliminationHintSeg)
|
||||
{
|
||||
strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg));
|
||||
}
|
||||
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
|
||||
|
||||
if (printLivenessRangeInfo)
|
||||
{
|
||||
strOutput.reset();
|
||||
IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START);
|
||||
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
|
||||
}
|
||||
//debug_printf("\n");
|
||||
strOutput.reset();
|
||||
|
||||
std::string disassemblyLine;
|
||||
for (sint32 i = 0; i < imlSegment->imlList.size(); i++)
|
||||
{
|
||||
const IMLInstruction& inst = imlSegment->imlList[i];
|
||||
// don't log NOP instructions
|
||||
if (inst.type == PPCREC_IML_TYPE_NO_OP)
|
||||
continue;
|
||||
strOutput.reset();
|
||||
strOutput.addFmt("{:02x} ", i);
|
||||
//cemuLog_log(LogType::Force, "{:02x} ", i);
|
||||
disassemblyLine.clear();
|
||||
IMLDebug_DisassembleInstruction(inst, disassemblyLine);
|
||||
strOutput.add(disassemblyLine);
|
||||
if (printLivenessRangeInfo)
|
||||
{
|
||||
IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i);
|
||||
}
|
||||
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
|
||||
}
|
||||
// all ranges
|
||||
if (printLivenessRangeInfo)
|
||||
{
|
||||
strOutput.reset();
|
||||
strOutput.add("Ranges-VirtReg ");
|
||||
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
||||
while (subrangeItr)
|
||||
{
|
||||
strOutput.addFmt("v{:<4}", (uint32)subrangeItr->GetVirtualRegister());
|
||||
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
||||
}
|
||||
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
|
||||
strOutput.reset();
|
||||
strOutput.add("Ranges-PhysReg ");
|
||||
subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
||||
while (subrangeItr)
|
||||
{
|
||||
strOutput.addFmt("p{:<4}", subrangeItr->GetPhysicalRegister());
|
||||
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
||||
}
|
||||
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
|
||||
}
|
||||
// branch info
|
||||
strOutput.reset();
|
||||
strOutput.add("Links from: ");
|
||||
for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++)
|
||||
{
|
||||
if (i)
|
||||
strOutput.add(", ");
|
||||
strOutput.addFmt("{}", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str());
|
||||
}
|
||||
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
|
||||
if (imlSegment->nextSegmentBranchNotTaken)
|
||||
cemuLog_log(LogType::Force, "BranchNotTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str());
|
||||
if (imlSegment->nextSegmentBranchTaken)
|
||||
cemuLog_log(LogType::Force, "BranchTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str());
|
||||
if (imlSegment->nextSegmentIsUncertain)
|
||||
cemuLog_log(LogType::Force, "Dynamic target");
|
||||
}
|
||||
|
||||
void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo)
|
||||
{
|
||||
for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++)
|
||||
{
|
||||
IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo);
|
||||
cemuLog_log(LogType::Force, "");
|
||||
}
|
||||
}
|
536
src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp
Normal file
536
src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp
Normal file
|
@ -0,0 +1,536 @@
|
|||
#include "IMLInstruction.h"
|
||||
#include "IML.h"
|
||||
|
||||
#include "../PPCRecompiler.h"
|
||||
#include "../PPCRecompilerIml.h"
|
||||
|
||||
// return true if an instruction has side effects on top of just reading and writing registers
|
||||
bool IMLInstruction::HasSideEffects() const
|
||||
{
|
||||
bool hasSideEffects = true;
|
||||
if(type == PPCREC_IML_TYPE_R_R || type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32)
|
||||
hasSideEffects = false;
|
||||
// todo - add more cases
|
||||
return hasSideEffects;
|
||||
}
|
||||
|
||||
void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
||||
{
|
||||
registersUsed->readGPR1 = IMLREG_INVALID;
|
||||
registersUsed->readGPR2 = IMLREG_INVALID;
|
||||
registersUsed->readGPR3 = IMLREG_INVALID;
|
||||
registersUsed->readGPR4 = IMLREG_INVALID;
|
||||
registersUsed->writtenGPR1 = IMLREG_INVALID;
|
||||
registersUsed->writtenGPR2 = IMLREG_INVALID;
|
||||
if (type == PPCREC_IML_TYPE_R_NAME)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_r_name.regR;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_NAME_R)
|
||||
{
|
||||
registersUsed->readGPR1 = op_r_name.regR;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R)
|
||||
{
|
||||
if (operation == PPCREC_IML_OP_X86_CMP)
|
||||
{
|
||||
// both operands are read only
|
||||
registersUsed->readGPR1 = op_r_r.regR;
|
||||
registersUsed->readGPR2 = op_r_r.regA;
|
||||
}
|
||||
else if (
|
||||
operation == PPCREC_IML_OP_ASSIGN ||
|
||||
operation == PPCREC_IML_OP_ENDIAN_SWAP ||
|
||||
operation == PPCREC_IML_OP_CNTLZW ||
|
||||
operation == PPCREC_IML_OP_NOT ||
|
||||
operation == PPCREC_IML_OP_NEG ||
|
||||
operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 ||
|
||||
operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32)
|
||||
{
|
||||
// result is written, operand is read
|
||||
registersUsed->writtenGPR1 = op_r_r.regR;
|
||||
registersUsed->readGPR1 = op_r_r.regA;
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_S32)
|
||||
{
|
||||
cemu_assert_debug(operation != PPCREC_IML_OP_ADD &&
|
||||
operation != PPCREC_IML_OP_SUB &&
|
||||
operation != PPCREC_IML_OP_AND &&
|
||||
operation != PPCREC_IML_OP_OR &&
|
||||
operation != PPCREC_IML_OP_XOR); // deprecated, use r_r_s32 for these
|
||||
|
||||
if (operation == PPCREC_IML_OP_LEFT_ROTATE)
|
||||
{
|
||||
// register operand is read and write
|
||||
registersUsed->readGPR1 = op_r_immS32.regR;
|
||||
registersUsed->writtenGPR1 = op_r_immS32.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_X86_CMP)
|
||||
{
|
||||
// register operand is read only
|
||||
registersUsed->readGPR1 = op_r_immS32.regR;
|
||||
}
|
||||
else
|
||||
{
|
||||
// register operand is write only
|
||||
// todo - use explicit lists, avoid default cases
|
||||
registersUsed->writtenGPR1 = op_r_immS32.regR;
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R_S32)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_r_r_s32.regR;
|
||||
registersUsed->readGPR1 = op_r_r_s32.regA;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_r_r_s32_carry.regR;
|
||||
registersUsed->readGPR1 = op_r_r_s32_carry.regA;
|
||||
// some operations read carry
|
||||
switch (operation)
|
||||
{
|
||||
case PPCREC_IML_OP_ADD_WITH_CARRY:
|
||||
registersUsed->readGPR2 = op_r_r_s32_carry.regCarry;
|
||||
break;
|
||||
case PPCREC_IML_OP_ADD:
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
// carry is always written
|
||||
registersUsed->writtenGPR2 = op_r_r_s32_carry.regCarry;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R_R)
|
||||
{
|
||||
// in all cases result is written and other operands are read only
|
||||
// with the exception of XOR, where if regA == regB then all bits are zeroed out. So we don't consider it a read
|
||||
registersUsed->writtenGPR1 = op_r_r_r.regR;
|
||||
if(!(operation == PPCREC_IML_OP_XOR && op_r_r_r.regA == op_r_r_r.regB))
|
||||
{
|
||||
registersUsed->readGPR1 = op_r_r_r.regA;
|
||||
registersUsed->readGPR2 = op_r_r_r.regB;
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_r_r_r_carry.regR;
|
||||
registersUsed->readGPR1 = op_r_r_r_carry.regA;
|
||||
registersUsed->readGPR2 = op_r_r_r_carry.regB;
|
||||
// some operations read carry
|
||||
switch (operation)
|
||||
{
|
||||
case PPCREC_IML_OP_ADD_WITH_CARRY:
|
||||
registersUsed->readGPR3 = op_r_r_r_carry.regCarry;
|
||||
break;
|
||||
case PPCREC_IML_OP_ADD:
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
// carry is always written
|
||||
registersUsed->writtenGPR2 = op_r_r_r_carry.regCarry;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
|
||||
{
|
||||
// no effect on registers
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_NO_OP)
|
||||
{
|
||||
// no effect on registers
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_MACRO)
|
||||
{
|
||||
if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE)
|
||||
{
|
||||
// no effect on registers
|
||||
}
|
||||
else if (operation == PPCREC_IML_MACRO_B_TO_REG)
|
||||
{
|
||||
cemu_assert_debug(op_macro.paramReg.IsValid());
|
||||
registersUsed->readGPR1 = op_macro.paramReg;
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_COMPARE)
|
||||
{
|
||||
registersUsed->readGPR1 = op_compare.regA;
|
||||
registersUsed->readGPR2 = op_compare.regB;
|
||||
registersUsed->writtenGPR1 = op_compare.regR;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_COMPARE_S32)
|
||||
{
|
||||
registersUsed->readGPR1 = op_compare_s32.regA;
|
||||
registersUsed->writtenGPR1 = op_compare_s32.regR;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
|
||||
{
|
||||
registersUsed->readGPR1 = op_conditional_jump.registerBool;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_JUMP)
|
||||
{
|
||||
// no registers affected
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_LOAD)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_storeLoad.registerData;
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR1 = op_storeLoad.registerMem;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_storeLoad.registerData;
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR1 = op_storeLoad.registerMem;
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem2;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_STORE)
|
||||
{
|
||||
registersUsed->readGPR1 = op_storeLoad.registerData;
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
|
||||
{
|
||||
registersUsed->readGPR1 = op_storeLoad.registerData;
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem;
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
registersUsed->readGPR3 = op_storeLoad.registerMem2;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
|
||||
{
|
||||
registersUsed->readGPR1 = op_atomic_compare_store.regEA;
|
||||
registersUsed->readGPR2 = op_atomic_compare_store.regCompareValue;
|
||||
registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue;
|
||||
registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_CALL_IMM)
|
||||
{
|
||||
if (op_call_imm.regParam0.IsValid())
|
||||
registersUsed->readGPR1 = op_call_imm.regParam0;
|
||||
if (op_call_imm.regParam1.IsValid())
|
||||
registersUsed->readGPR2 = op_call_imm.regParam1;
|
||||
if (op_call_imm.regParam2.IsValid())
|
||||
registersUsed->readGPR3 = op_call_imm.regParam2;
|
||||
registersUsed->writtenGPR1 = op_call_imm.regReturn;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_LOAD)
|
||||
{
|
||||
// fpr load operation
|
||||
registersUsed->writtenGPR1 = op_storeLoad.registerData;
|
||||
// address is in gpr register
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR1 = op_storeLoad.registerMem;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
|
||||
{
|
||||
// fpr load operation
|
||||
registersUsed->writtenGPR1 = op_storeLoad.registerData;
|
||||
// address is in gpr registers
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR1 = op_storeLoad.registerMem;
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem2;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE)
|
||||
{
|
||||
// fpr store operation
|
||||
registersUsed->readGPR1 = op_storeLoad.registerData;
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
|
||||
{
|
||||
// fpr store operation
|
||||
registersUsed->readGPR1 = op_storeLoad.registerData;
|
||||
// address is in gpr registers
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem;
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
registersUsed->readGPR3 = op_storeLoad.registerMem2;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
{
|
||||
// fpr operation
|
||||
if (
|
||||
operation == PPCREC_IML_OP_FPR_ASSIGN ||
|
||||
operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
|
||||
operation == PPCREC_IML_OP_FPR_FCTIWZ
|
||||
)
|
||||
{
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_MULTIPLY ||
|
||||
operation == PPCREC_IML_OP_FPR_DIVIDE ||
|
||||
operation == PPCREC_IML_OP_FPR_ADD ||
|
||||
operation == PPCREC_IML_OP_FPR_SUB)
|
||||
{
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r.regR;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
|
||||
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ||
|
||||
operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
|
||||
{
|
||||
// fpr operation
|
||||
registersUsed->readGPR1 = op_fpr_r_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r_r.regB;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r_r.regR;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
|
||||
{
|
||||
// fpr operation
|
||||
registersUsed->readGPR1 = op_fpr_r_r_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r_r_r.regB;
|
||||
registersUsed->readGPR3 = op_fpr_r_r_r_r.regC;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r_r_r.regR;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R)
|
||||
{
|
||||
// fpr operation
|
||||
if (operation == PPCREC_IML_OP_FPR_NEGATE ||
|
||||
operation == PPCREC_IML_OP_FPR_ABS ||
|
||||
operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS ||
|
||||
operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
|
||||
operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
|
||||
{
|
||||
registersUsed->readGPR1 = op_fpr_r.regR;
|
||||
registersUsed->writtenGPR1 = op_fpr_r.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_fpr_r.regR;
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_fpr_compare.regR;
|
||||
registersUsed->readGPR1 = op_fpr_compare.regA;
|
||||
registersUsed->readGPR2 = op_fpr_compare.regB;
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
|
||||
{
|
||||
// no registers read or written (except for the implicit eflags)
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
|
||||
IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map<IMLRegID, IMLRegID>& translationTable)
|
||||
{
|
||||
if (reg.IsInvalid())
|
||||
return reg;
|
||||
const auto& it = translationTable.find(reg.GetRegID());
|
||||
cemu_assert_debug(it != translationTable.cend());
|
||||
IMLReg alteredReg = reg;
|
||||
alteredReg.SetRegID(it->second);
|
||||
return alteredReg;
|
||||
}
|
||||
|
||||
void IMLInstruction::RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& translationTable)
|
||||
{
|
||||
if (type == PPCREC_IML_TYPE_R_NAME)
|
||||
{
|
||||
op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_NAME_R)
|
||||
{
|
||||
op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R)
|
||||
{
|
||||
op_r_r.regR = replaceRegisterIdMultiple(op_r_r.regR, translationTable);
|
||||
op_r_r.regA = replaceRegisterIdMultiple(op_r_r.regA, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_S32)
|
||||
{
|
||||
op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R_S32)
|
||||
{
|
||||
op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable);
|
||||
op_r_r_s32.regA = replaceRegisterIdMultiple(op_r_r_s32.regA, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
|
||||
{
|
||||
op_r_r_s32_carry.regR = replaceRegisterIdMultiple(op_r_r_s32_carry.regR, translationTable);
|
||||
op_r_r_s32_carry.regA = replaceRegisterIdMultiple(op_r_r_s32_carry.regA, translationTable);
|
||||
op_r_r_s32_carry.regCarry = replaceRegisterIdMultiple(op_r_r_s32_carry.regCarry, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R_R)
|
||||
{
|
||||
op_r_r_r.regR = replaceRegisterIdMultiple(op_r_r_r.regR, translationTable);
|
||||
op_r_r_r.regA = replaceRegisterIdMultiple(op_r_r_r.regA, translationTable);
|
||||
op_r_r_r.regB = replaceRegisterIdMultiple(op_r_r_r.regB, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
|
||||
{
|
||||
op_r_r_r_carry.regR = replaceRegisterIdMultiple(op_r_r_r_carry.regR, translationTable);
|
||||
op_r_r_r_carry.regA = replaceRegisterIdMultiple(op_r_r_r_carry.regA, translationTable);
|
||||
op_r_r_r_carry.regB = replaceRegisterIdMultiple(op_r_r_r_carry.regB, translationTable);
|
||||
op_r_r_r_carry.regCarry = replaceRegisterIdMultiple(op_r_r_r_carry.regCarry, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_COMPARE)
|
||||
{
|
||||
op_compare.regR = replaceRegisterIdMultiple(op_compare.regR, translationTable);
|
||||
op_compare.regA = replaceRegisterIdMultiple(op_compare.regA, translationTable);
|
||||
op_compare.regB = replaceRegisterIdMultiple(op_compare.regB, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_COMPARE_S32)
|
||||
{
|
||||
op_compare_s32.regR = replaceRegisterIdMultiple(op_compare_s32.regR, translationTable);
|
||||
op_compare_s32.regA = replaceRegisterIdMultiple(op_compare_s32.regA, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
|
||||
{
|
||||
op_conditional_jump.registerBool = replaceRegisterIdMultiple(op_conditional_jump.registerBool, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP)
|
||||
{
|
||||
// no effect on registers
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_NO_OP)
|
||||
{
|
||||
// no effect on registers
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_MACRO)
|
||||
{
|
||||
if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_COUNT_CYCLES)
|
||||
{
|
||||
// no effect on registers
|
||||
}
|
||||
else if (operation == PPCREC_IML_MACRO_B_TO_REG)
|
||||
{
|
||||
op_macro.paramReg = replaceRegisterIdMultiple(op_macro.paramReg, translationTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_LOAD)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
{
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_STORE)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
|
||||
{
|
||||
op_atomic_compare_store.regEA = replaceRegisterIdMultiple(op_atomic_compare_store.regEA, translationTable);
|
||||
op_atomic_compare_store.regCompareValue = replaceRegisterIdMultiple(op_atomic_compare_store.regCompareValue, translationTable);
|
||||
op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable);
|
||||
op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_CALL_IMM)
|
||||
{
|
||||
op_call_imm.regReturn = replaceRegisterIdMultiple(op_call_imm.regReturn, translationTable);
|
||||
if (op_call_imm.regParam0.IsValid())
|
||||
op_call_imm.regParam0 = replaceRegisterIdMultiple(op_call_imm.regParam0, translationTable);
|
||||
if (op_call_imm.regParam1.IsValid())
|
||||
op_call_imm.regParam1 = replaceRegisterIdMultiple(op_call_imm.regParam1, translationTable);
|
||||
if (op_call_imm.regParam2.IsValid())
|
||||
op_call_imm.regParam2 = replaceRegisterIdMultiple(op_call_imm.regParam2, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_LOAD)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R)
|
||||
{
|
||||
op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
{
|
||||
op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, translationTable);
|
||||
op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
|
||||
{
|
||||
op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, translationTable);
|
||||
op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, translationTable);
|
||||
op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
|
||||
{
|
||||
op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, translationTable);
|
||||
op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, translationTable);
|
||||
op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, translationTable);
|
||||
op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
|
||||
{
|
||||
op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, translationTable);
|
||||
op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable);
|
||||
op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
|
||||
{
|
||||
// no registers read or written (except for the implicit eflags)
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
826
src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h
Normal file
826
src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h
Normal file
|
@ -0,0 +1,826 @@
|
|||
#pragma once
|
||||
|
||||
using IMLRegID = uint16; // 16 bit ID
|
||||
using IMLPhysReg = sint32; // arbitrary value that is up to the architecture backend, usually this will be the register index. A value of -1 is reserved and means not assigned
|
||||
|
||||
// format of IMLReg:
|
||||
// 0-15 (16 bit) IMLRegID
|
||||
// 19-23 (5 bit) Offset In elements, for SIMD registers
|
||||
// 24-27 (4 bit) IMLRegFormat RegFormat
|
||||
// 28-31 (4 bit) IMLRegFormat BaseFormat
|
||||
|
||||
enum class IMLRegFormat : uint8
|
||||
{
|
||||
INVALID_FORMAT,
|
||||
I64,
|
||||
I32,
|
||||
I16,
|
||||
I8,
|
||||
// I1 ?
|
||||
F64,
|
||||
F32,
|
||||
TYPE_COUNT,
|
||||
};
|
||||
|
||||
class IMLReg
|
||||
{
|
||||
public:
|
||||
IMLReg()
|
||||
{
|
||||
m_raw = 0; // 0 is invalid
|
||||
}
|
||||
|
||||
IMLReg(IMLRegFormat baseRegFormat, IMLRegFormat regFormat, uint8 viewOffset, IMLRegID regId)
|
||||
{
|
||||
m_raw = 0;
|
||||
m_raw |= ((uint8)baseRegFormat << 28);
|
||||
m_raw |= ((uint8)regFormat << 24);
|
||||
m_raw |= (uint32)regId;
|
||||
}
|
||||
|
||||
IMLReg(IMLReg&& baseReg, IMLRegFormat viewFormat, uint8 viewOffset, IMLRegID regId)
|
||||
{
|
||||
DEBUG_BREAK;
|
||||
//m_raw = 0;
|
||||
//m_raw |= ((uint8)baseRegFormat << 28);
|
||||
//m_raw |= ((uint8)viewFormat << 24);
|
||||
//m_raw |= (uint32)regId;
|
||||
}
|
||||
|
||||
IMLReg(const IMLReg& other) : m_raw(other.m_raw) {}
|
||||
|
||||
IMLRegFormat GetBaseFormat() const
|
||||
{
|
||||
return (IMLRegFormat)((m_raw >> 28) & 0xF);
|
||||
}
|
||||
|
||||
IMLRegFormat GetRegFormat() const
|
||||
{
|
||||
return (IMLRegFormat)((m_raw >> 24) & 0xF);
|
||||
}
|
||||
|
||||
IMLRegID GetRegID() const
|
||||
{
|
||||
cemu_assert_debug(GetBaseFormat() != IMLRegFormat::INVALID_FORMAT);
|
||||
cemu_assert_debug(GetRegFormat() != IMLRegFormat::INVALID_FORMAT);
|
||||
return (IMLRegID)(m_raw & 0xFFFF);
|
||||
}
|
||||
|
||||
void SetRegID(IMLRegID regId)
|
||||
{
|
||||
cemu_assert_debug(regId <= 0xFFFF);
|
||||
m_raw &= ~0xFFFF;
|
||||
m_raw |= (uint32)regId;
|
||||
}
|
||||
|
||||
bool IsInvalid() const
|
||||
{
|
||||
return GetBaseFormat() == IMLRegFormat::INVALID_FORMAT;
|
||||
}
|
||||
|
||||
bool IsValid() const
|
||||
{
|
||||
return GetBaseFormat() != IMLRegFormat::INVALID_FORMAT;
|
||||
}
|
||||
|
||||
bool IsValidAndSameRegID(IMLRegID regId) const
|
||||
{
|
||||
return IsValid() && GetRegID() == regId;
|
||||
}
|
||||
|
||||
// compare all fields
|
||||
bool operator==(const IMLReg& other) const
|
||||
{
|
||||
return m_raw == other.m_raw;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32 m_raw;
|
||||
};
|
||||
|
||||
static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0);
|
||||
static const IMLRegID IMLRegID_INVALID(0xFFFF);
|
||||
|
||||
using IMLName = uint32;
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_OP_ASSIGN, // '=' operator
|
||||
PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
|
||||
PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
|
||||
PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
|
||||
PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
|
||||
PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
|
||||
PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
|
||||
|
||||
// binary operation
|
||||
PPCREC_IML_OP_OR, // '|' operator
|
||||
PPCREC_IML_OP_AND, // '&' operator
|
||||
PPCREC_IML_OP_XOR, // '^' operator
|
||||
PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
|
||||
PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
|
||||
PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned)
|
||||
PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed)
|
||||
// ppc
|
||||
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_CNTLZW,
|
||||
// FPU
|
||||
PPCREC_IML_OP_FPR_ASSIGN,
|
||||
PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
|
||||
PPCREC_IML_OP_FPR_ADD,
|
||||
PPCREC_IML_OP_FPR_SUB,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY,
|
||||
PPCREC_IML_OP_FPR_DIVIDE,
|
||||
PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, // expand f32 to f64 in-place
|
||||
PPCREC_IML_OP_FPR_NEGATE,
|
||||
PPCREC_IML_OP_FPR_ABS, // abs(fpr)
|
||||
PPCREC_IML_OP_FPR_NEGATIVE_ABS, // -abs(fpr)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
|
||||
PPCREC_IML_OP_FPR_FCTIWZ,
|
||||
PPCREC_IML_OP_FPR_SELECT, // selectively copy bottom value from operand B or C based on value in operand A
|
||||
// Conversion (FPR_R_R)
|
||||
PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
|
||||
PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
|
||||
|
||||
// Bitcast (FPR_R_R)
|
||||
PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT,
|
||||
|
||||
// R_R_R + R_R_S32
|
||||
PPCREC_IML_OP_ADD, // also R_R_R_CARRY
|
||||
PPCREC_IML_OP_SUB,
|
||||
|
||||
// R_R only
|
||||
PPCREC_IML_OP_NOT,
|
||||
PPCREC_IML_OP_NEG,
|
||||
PPCREC_IML_OP_ASSIGN_S16_TO_S32,
|
||||
PPCREC_IML_OP_ASSIGN_S8_TO_S32,
|
||||
|
||||
// R_R_R_carry
|
||||
PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1)
|
||||
|
||||
// X86 extension
|
||||
PPCREC_IML_OP_X86_CMP, // R_R and R_S32
|
||||
|
||||
PPCREC_IML_OP_INVALID
|
||||
};
|
||||
|
||||
#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_MACRO_B_TO_REG, // branch to PPC address in register (used for BCCTR, BCLR)
|
||||
|
||||
PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
|
||||
PPCREC_IML_MACRO_B_FAR, // branch to different function
|
||||
PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
|
||||
PPCREC_IML_MACRO_HLE, // HLE function call
|
||||
PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
|
||||
// debugging
|
||||
PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
|
||||
};
|
||||
|
||||
enum class IMLCondition : uint8
|
||||
{
|
||||
EQ,
|
||||
NEQ,
|
||||
SIGNED_GT,
|
||||
SIGNED_LT,
|
||||
UNSIGNED_GT,
|
||||
UNSIGNED_LT,
|
||||
|
||||
// floating point conditions
|
||||
UNORDERED_GT, // a > b, false if either is NaN
|
||||
UNORDERED_LT, // a < b, false if either is NaN
|
||||
UNORDERED_EQ, // a == b, false if either is NaN
|
||||
UNORDERED_U, // unordered (true if either operand is NaN)
|
||||
|
||||
ORDERED_GT,
|
||||
ORDERED_LT,
|
||||
ORDERED_EQ,
|
||||
ORDERED_U
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_TYPE_NONE,
|
||||
PPCREC_IML_TYPE_NO_OP, // no-op instruction
|
||||
PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r)
|
||||
PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
|
||||
PPCREC_IML_TYPE_R_R_R_CARRY, // r* = r* (op) r* (reads and/or updates carry)
|
||||
PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
|
||||
PPCREC_IML_TYPE_R_R_S32_CARRY, // r* = r* (op) s32* (reads and/or updates carry)
|
||||
PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
|
||||
PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
|
||||
PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
|
||||
PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
|
||||
PPCREC_IML_TYPE_R_NAME, // r* = name
|
||||
PPCREC_IML_TYPE_NAME_R, // name* = r*
|
||||
PPCREC_IML_TYPE_R_S32, // r* (op) imm
|
||||
PPCREC_IML_TYPE_MACRO,
|
||||
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0
|
||||
|
||||
// conditions and branches
|
||||
PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r*
|
||||
PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm
|
||||
PPCREC_IML_TYPE_JUMP, // jump always
|
||||
PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register
|
||||
|
||||
// atomic
|
||||
PPCREC_IML_TYPE_ATOMIC_CMP_STORE,
|
||||
|
||||
// function call
|
||||
PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address
|
||||
|
||||
// FPR
|
||||
PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R_R_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R,
|
||||
|
||||
PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r*
|
||||
|
||||
// X86 specific
|
||||
PPCREC_IML_TYPE_X86_EFLAGS_JCC,
|
||||
};
|
||||
|
||||
enum // IMLName
|
||||
{
|
||||
PPCREC_NAME_NONE,
|
||||
PPCREC_NAME_TEMPORARY = 1000,
|
||||
PPCREC_NAME_R0 = 2000,
|
||||
PPCREC_NAME_SPR0 = 3000,
|
||||
PPCREC_NAME_FPR_HALF = 4800, // Counts PS0 and PS1 separately. E.g. fp3.ps1 is at offset 3 * 2 + 1
|
||||
PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
|
||||
PPCREC_NAME_XER_CA = 6000, // carry bit from XER
|
||||
PPCREC_NAME_XER_OV = 6001, // overflow bit from XER
|
||||
PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER
|
||||
PPCREC_NAME_CR = 7000, // CR register bits (31 to 0)
|
||||
PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31,
|
||||
PPCREC_NAME_CPU_MEMRES_EA = 8000,
|
||||
PPCREC_NAME_CPU_MEMRES_VAL = 8001
|
||||
};
|
||||
|
||||
#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead
|
||||
|
||||
enum
|
||||
{
|
||||
// fpr load
|
||||
PPCREC_FPR_LD_MODE_SINGLE,
|
||||
PPCREC_FPR_LD_MODE_DOUBLE,
|
||||
|
||||
// fpr store
|
||||
PPCREC_FPR_ST_MODE_SINGLE,
|
||||
PPCREC_FPR_ST_MODE_DOUBLE,
|
||||
|
||||
PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
|
||||
};
|
||||
|
||||
struct IMLUsedRegisters
|
||||
{
|
||||
IMLUsedRegisters() {};
|
||||
|
||||
bool IsWrittenByRegId(IMLRegID regId) const
|
||||
{
|
||||
if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId)
|
||||
return true;
|
||||
if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsBaseGPRWritten(IMLReg imlReg) const
|
||||
{
|
||||
cemu_assert_debug(imlReg.IsValid());
|
||||
auto regId = imlReg.GetRegID();
|
||||
return IsWrittenByRegId(regId);
|
||||
}
|
||||
|
||||
template<typename Fn>
|
||||
void ForEachWrittenGPR(Fn F) const
|
||||
{
|
||||
if (writtenGPR1.IsValid())
|
||||
F(writtenGPR1);
|
||||
if (writtenGPR2.IsValid())
|
||||
F(writtenGPR2);
|
||||
}
|
||||
|
||||
template<typename Fn>
|
||||
void ForEachReadGPR(Fn F) const
|
||||
{
|
||||
if (readGPR1.IsValid())
|
||||
F(readGPR1);
|
||||
if (readGPR2.IsValid())
|
||||
F(readGPR2);
|
||||
if (readGPR3.IsValid())
|
||||
F(readGPR3);
|
||||
if (readGPR4.IsValid())
|
||||
F(readGPR4);
|
||||
}
|
||||
|
||||
template<typename Fn>
|
||||
void ForEachAccessedGPR(Fn F) const
|
||||
{
|
||||
// GPRs
|
||||
if (readGPR1.IsValid())
|
||||
F(readGPR1, false);
|
||||
if (readGPR2.IsValid())
|
||||
F(readGPR2, false);
|
||||
if (readGPR3.IsValid())
|
||||
F(readGPR3, false);
|
||||
if (readGPR4.IsValid())
|
||||
F(readGPR4, false);
|
||||
if (writtenGPR1.IsValid())
|
||||
F(writtenGPR1, true);
|
||||
if (writtenGPR2.IsValid())
|
||||
F(writtenGPR2, true);
|
||||
}
|
||||
|
||||
IMLReg readGPR1;
|
||||
IMLReg readGPR2;
|
||||
IMLReg readGPR3;
|
||||
IMLReg readGPR4;
|
||||
IMLReg writtenGPR1;
|
||||
IMLReg writtenGPR2;
|
||||
};
|
||||
|
||||
struct IMLInstruction
|
||||
{
|
||||
IMLInstruction() {}
|
||||
IMLInstruction(const IMLInstruction& other)
|
||||
{
|
||||
memcpy(this, &other, sizeof(IMLInstruction));
|
||||
}
|
||||
|
||||
uint8 type;
|
||||
uint8 operation;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8 _padding[7];
|
||||
}padding;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLReg regA;
|
||||
}op_r_r;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLReg regA;
|
||||
IMLReg regB;
|
||||
}op_r_r_r;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLReg regA;
|
||||
IMLReg regB;
|
||||
IMLReg regCarry;
|
||||
}op_r_r_r_carry;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLReg regA;
|
||||
sint32 immS32;
|
||||
}op_r_r_s32;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLReg regA;
|
||||
IMLReg regCarry;
|
||||
sint32 immS32;
|
||||
}op_r_r_s32_carry;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLName name;
|
||||
}op_r_name; // alias op_name_r
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
sint32 immS32;
|
||||
}op_r_immS32;
|
||||
struct
|
||||
{
|
||||
uint32 param;
|
||||
uint32 param2;
|
||||
uint16 paramU16;
|
||||
IMLReg paramReg;
|
||||
}op_macro;
|
||||
struct
|
||||
{
|
||||
IMLReg registerData;
|
||||
IMLReg registerMem;
|
||||
IMLReg registerMem2;
|
||||
uint8 copyWidth;
|
||||
struct
|
||||
{
|
||||
bool swapEndian : 1;
|
||||
bool signExtend : 1;
|
||||
bool notExpanded : 1; // for floats
|
||||
}flags2;
|
||||
uint8 mode; // transfer mode
|
||||
sint32 immS32;
|
||||
}op_storeLoad;
|
||||
struct
|
||||
{
|
||||
uintptr_t callAddress;
|
||||
IMLReg regParam0;
|
||||
IMLReg regParam1;
|
||||
IMLReg regParam2;
|
||||
IMLReg regReturn;
|
||||
}op_call_imm;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLReg regA;
|
||||
}op_fpr_r_r;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLReg regA;
|
||||
IMLReg regB;
|
||||
}op_fpr_r_r_r;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
IMLReg regA;
|
||||
IMLReg regB;
|
||||
IMLReg regC;
|
||||
}op_fpr_r_r_r_r;
|
||||
struct
|
||||
{
|
||||
IMLReg regR;
|
||||
}op_fpr_r;
|
||||
struct
|
||||
{
|
||||
IMLReg regR; // stores the boolean result of the comparison
|
||||
IMLReg regA;
|
||||
IMLReg regB;
|
||||
IMLCondition cond;
|
||||
}op_fpr_compare;
|
||||
struct
|
||||
{
|
||||
IMLReg regR; // stores the boolean result of the comparison
|
||||
IMLReg regA;
|
||||
IMLReg regB;
|
||||
IMLCondition cond;
|
||||
}op_compare;
|
||||
struct
|
||||
{
|
||||
IMLReg regR; // stores the boolean result of the comparison
|
||||
IMLReg regA;
|
||||
sint32 immS32;
|
||||
IMLCondition cond;
|
||||
}op_compare_s32;
|
||||
struct
|
||||
{
|
||||
IMLReg registerBool;
|
||||
bool mustBeTrue;
|
||||
}op_conditional_jump;
|
||||
struct
|
||||
{
|
||||
IMLReg regEA;
|
||||
IMLReg regCompareValue;
|
||||
IMLReg regWriteValue;
|
||||
IMLReg regBoolOut;
|
||||
}op_atomic_compare_store;
|
||||
// conditional operations (emitted if supported by target platform)
|
||||
struct
|
||||
{
|
||||
// r_s32
|
||||
IMLReg regR;
|
||||
sint32 immS32;
|
||||
// condition
|
||||
uint8 crRegisterIndex;
|
||||
uint8 crBitIndex;
|
||||
bool bitMustBeSet;
|
||||
}op_conditional_r_s32;
|
||||
// X86 specific
|
||||
struct
|
||||
{
|
||||
IMLCondition cond;
|
||||
bool invertedCondition;
|
||||
}op_x86_eflags_jcc;
|
||||
};
|
||||
|
||||
bool IsSuffixInstruction() const
|
||||
{
|
||||
if (type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BL ||
|
||||
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_FAR ||
|
||||
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_TO_REG ||
|
||||
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE ||
|
||||
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE ||
|
||||
type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ||
|
||||
type == PPCREC_IML_TYPE_JUMP ||
|
||||
type == PPCREC_IML_TYPE_CONDITIONAL_JUMP ||
|
||||
type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// instruction setters
|
||||
void make_no_op()
|
||||
{
|
||||
type = PPCREC_IML_TYPE_NO_OP;
|
||||
operation = 0;
|
||||
}
|
||||
|
||||
void make_r_name(IMLReg regR, IMLName name)
|
||||
{
|
||||
cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format
|
||||
type = PPCREC_IML_TYPE_R_NAME;
|
||||
operation = PPCREC_IML_OP_ASSIGN;
|
||||
op_r_name.regR = regR;
|
||||
op_r_name.name = name;
|
||||
}
|
||||
|
||||
void make_name_r(IMLName name, IMLReg regR)
|
||||
{
|
||||
cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format
|
||||
type = PPCREC_IML_TYPE_NAME_R;
|
||||
operation = PPCREC_IML_OP_ASSIGN;
|
||||
op_r_name.regR = regR;
|
||||
op_r_name.name = name;
|
||||
}
|
||||
|
||||
void make_debugbreak(uint32 currentPPCAddress = 0)
|
||||
{
|
||||
make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0, IMLREG_INVALID);
|
||||
}
|
||||
|
||||
void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16, IMLReg regParam)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_MACRO;
|
||||
this->operation = macroId;
|
||||
this->op_macro.param = param;
|
||||
this->op_macro.param2 = param2;
|
||||
this->op_macro.paramU16 = paramU16;
|
||||
this->op_macro.paramReg = regParam;
|
||||
}
|
||||
|
||||
void make_cjump_cycle_check()
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK;
|
||||
this->operation = 0;
|
||||
}
|
||||
|
||||
void make_r_r(uint32 operation, IMLReg regR, IMLReg regA)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_R_R;
|
||||
this->operation = operation;
|
||||
this->op_r_r.regR = regR;
|
||||
this->op_r_r.regA = regA;
|
||||
}
|
||||
|
||||
void make_r_s32(uint32 operation, IMLReg regR, sint32 immS32)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_R_S32;
|
||||
this->operation = operation;
|
||||
this->op_r_immS32.regR = regR;
|
||||
this->op_r_immS32.immS32 = immS32;
|
||||
}
|
||||
|
||||
void make_r_r_r(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_R_R_R;
|
||||
this->operation = operation;
|
||||
this->op_r_r_r.regR = regR;
|
||||
this->op_r_r_r.regA = regA;
|
||||
this->op_r_r_r.regB = regB;
|
||||
}
|
||||
|
||||
void make_r_r_r_carry(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB, IMLReg regCarry)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_R_R_R_CARRY;
|
||||
this->operation = operation;
|
||||
this->op_r_r_r_carry.regR = regR;
|
||||
this->op_r_r_r_carry.regA = regA;
|
||||
this->op_r_r_r_carry.regB = regB;
|
||||
this->op_r_r_r_carry.regCarry = regCarry;
|
||||
}
|
||||
|
||||
void make_r_r_s32(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_R_R_S32;
|
||||
this->operation = operation;
|
||||
this->op_r_r_s32.regR = regR;
|
||||
this->op_r_r_s32.regA = regA;
|
||||
this->op_r_r_s32.immS32 = immS32;
|
||||
}
|
||||
|
||||
void make_r_r_s32_carry(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32, IMLReg regCarry)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_R_R_S32_CARRY;
|
||||
this->operation = operation;
|
||||
this->op_r_r_s32_carry.regR = regR;
|
||||
this->op_r_r_s32_carry.regA = regA;
|
||||
this->op_r_r_s32_carry.immS32 = immS32;
|
||||
this->op_r_r_s32_carry.regCarry = regCarry;
|
||||
}
|
||||
|
||||
void make_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_COMPARE;
|
||||
this->operation = PPCREC_IML_OP_INVALID;
|
||||
this->op_compare.regR = regR;
|
||||
this->op_compare.regA = regA;
|
||||
this->op_compare.regB = regB;
|
||||
this->op_compare.cond = cond;
|
||||
}
|
||||
|
||||
void make_compare_s32(IMLReg regA, sint32 immS32, IMLReg regR, IMLCondition cond)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_COMPARE_S32;
|
||||
this->operation = PPCREC_IML_OP_INVALID;
|
||||
this->op_compare_s32.regR = regR;
|
||||
this->op_compare_s32.regA = regA;
|
||||
this->op_compare_s32.immS32 = immS32;
|
||||
this->op_compare_s32.cond = cond;
|
||||
}
|
||||
|
||||
void make_conditional_jump(IMLReg regBool, bool mustBeTrue)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP;
|
||||
this->operation = PPCREC_IML_OP_INVALID;
|
||||
this->op_conditional_jump.registerBool = regBool;
|
||||
this->op_conditional_jump.mustBeTrue = mustBeTrue;
|
||||
}
|
||||
|
||||
void make_jump()
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_JUMP;
|
||||
this->operation = PPCREC_IML_OP_INVALID;
|
||||
}
|
||||
|
||||
// load from memory
|
||||
void make_r_memory(IMLReg regD, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_LOAD;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = regD;
|
||||
this->op_storeLoad.registerMem = regMem;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.copyWidth = copyWidth;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
this->op_storeLoad.flags2.signExtend = signExtend;
|
||||
}
|
||||
|
||||
// store to memory
|
||||
void make_memory_r(IMLReg regS, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_STORE;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = regS;
|
||||
this->op_storeLoad.registerMem = regMem;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.copyWidth = copyWidth;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
this->op_storeLoad.flags2.signExtend = false;
|
||||
}
|
||||
|
||||
void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE;
|
||||
this->operation = 0;
|
||||
this->op_atomic_compare_store.regEA = regEA;
|
||||
this->op_atomic_compare_store.regCompareValue = regCompareValue;
|
||||
this->op_atomic_compare_store.regWriteValue = regWriteValue;
|
||||
this->op_atomic_compare_store.regBoolOut = regSuccessOutput;
|
||||
}
|
||||
|
||||
void make_call_imm(uintptr_t callAddress, IMLReg param0, IMLReg param1, IMLReg param2, IMLReg regReturn)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_CALL_IMM;
|
||||
this->operation = 0;
|
||||
this->op_call_imm.callAddress = callAddress;
|
||||
this->op_call_imm.regParam0 = param0;
|
||||
this->op_call_imm.regParam1 = param1;
|
||||
this->op_call_imm.regParam2 = param2;
|
||||
this->op_call_imm.regReturn = regReturn;
|
||||
}
|
||||
|
||||
// FPR
|
||||
|
||||
// load from memory
|
||||
void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_LOAD;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerDestination;
|
||||
this->op_storeLoad.registerMem = registerMemory;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerDestination;
|
||||
this->op_storeLoad.registerMem = registerMemory1;
|
||||
this->op_storeLoad.registerMem2 = registerMemory2;
|
||||
this->op_storeLoad.immS32 = 0;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
// store to memory
|
||||
void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_STORE;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerSource;
|
||||
this->op_storeLoad.registerMem = registerMemory;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerSource;
|
||||
this->op_storeLoad.registerMem = registerMemory1;
|
||||
this->op_storeLoad.registerMem2 = registerMemory2;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_COMPARE;
|
||||
this->operation = -999;
|
||||
this->op_fpr_compare.regR = regR;
|
||||
this->op_fpr_compare.regA = regA;
|
||||
this->op_fpr_compare.regB = regB;
|
||||
this->op_fpr_compare.cond = cond;
|
||||
}
|
||||
|
||||
void make_fpr_r(sint32 operation, IMLReg registerResult)
|
||||
{
|
||||
// OP (fpr)
|
||||
this->type = PPCREC_IML_TYPE_FPR_R;
|
||||
this->operation = operation;
|
||||
this->op_fpr_r.regR = registerResult;
|
||||
}
|
||||
|
||||
void make_fpr_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER)
|
||||
{
|
||||
// fpr OP fpr
|
||||
this->type = PPCREC_IML_TYPE_FPR_R_R;
|
||||
this->operation = operation;
|
||||
this->op_fpr_r_r.regR = registerResult;
|
||||
this->op_fpr_r_r.regA = registerOperand;
|
||||
}
|
||||
|
||||
void make_fpr_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER)
|
||||
{
|
||||
// fpr = OP (fpr,fpr)
|
||||
this->type = PPCREC_IML_TYPE_FPR_R_R_R;
|
||||
this->operation = operation;
|
||||
this->op_fpr_r_r_r.regR = registerResult;
|
||||
this->op_fpr_r_r_r.regA = registerOperand1;
|
||||
this->op_fpr_r_r_r.regB = registerOperand2;
|
||||
}
|
||||
|
||||
void make_fpr_r_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER)
|
||||
{
|
||||
// fpr = OP (fpr,fpr,fpr)
|
||||
this->type = PPCREC_IML_TYPE_FPR_R_R_R_R;
|
||||
this->operation = operation;
|
||||
this->op_fpr_r_r_r_r.regR = registerResult;
|
||||
this->op_fpr_r_r_r_r.regA = registerOperandA;
|
||||
this->op_fpr_r_r_r_r.regB = registerOperandB;
|
||||
this->op_fpr_r_r_r_r.regC = registerOperandC;
|
||||
}
|
||||
|
||||
/* X86 specific */
|
||||
void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_X86_EFLAGS_JCC;
|
||||
this->operation = -999;
|
||||
this->op_x86_eflags_jcc.cond = cond;
|
||||
this->op_x86_eflags_jcc.invertedCondition = invertedCondition;
|
||||
}
|
||||
|
||||
void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
|
||||
bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used
|
||||
|
||||
void RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& translationTable);
|
||||
};
|
||||
|
||||
// architecture specific constants
|
||||
namespace IMLArchX86
|
||||
{
|
||||
static constexpr int PHYSREG_GPR_BASE = 0;
|
||||
static constexpr int PHYSREG_FPR_BASE = 16;
|
||||
};
|
719
src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
Normal file
719
src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
Normal file
|
@ -0,0 +1,719 @@
|
|||
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
|
||||
#include "Cafe/HW/Espresso/Recompiler/IML/IML.h"
|
||||
#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h"
|
||||
|
||||
#include "../PPCRecompiler.h"
|
||||
#include "../PPCRecompilerIml.h"
|
||||
#include "../BackendX64/BackendX64.h"
|
||||
|
||||
#include "Common/FileStream.h"
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
IMLReg _FPRRegFromID(IMLRegID regId)
|
||||
{
|
||||
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId);
|
||||
}
|
||||
|
||||
void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg)
|
||||
{
|
||||
IMLRegID fprIndex = fprReg.GetRegID();
|
||||
|
||||
IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
|
||||
if (imlInstructionLoad->op_storeLoad.flags2.notExpanded)
|
||||
return;
|
||||
boost::container::static_vector<sint32, 4> trackedMoves; // only track up to 4 copies
|
||||
IMLUsedRegisters registersUsed;
|
||||
sint32 scanRangeEnd = std::min<sint32>(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
|
||||
bool foundMatch = false;
|
||||
sint32 lastStore = -1;
|
||||
for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++)
|
||||
{
|
||||
IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
|
||||
if (imlInstruction->IsSuffixInstruction())
|
||||
break;
|
||||
// check if FPR is stored
|
||||
if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE) ||
|
||||
(imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE))
|
||||
{
|
||||
if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex)
|
||||
{
|
||||
if (foundMatch == false)
|
||||
{
|
||||
// flag the load-single instruction as "don't expand" (leave single value as-is)
|
||||
imlInstructionLoad->op_storeLoad.flags2.notExpanded = true;
|
||||
}
|
||||
// also set the flag for the store instruction
|
||||
IMLInstruction* imlInstructionStore = imlInstruction;
|
||||
imlInstructionStore->op_storeLoad.flags2.notExpanded = true;
|
||||
|
||||
foundMatch = true;
|
||||
lastStore = i + 1;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// if the FPR is copied then keep track of it. We can expand the copies instead of the original
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN && imlInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex)
|
||||
{
|
||||
if (imlInstruction->op_fpr_r_r.regR.GetRegID() == fprIndex)
|
||||
{
|
||||
// unexpected no-op
|
||||
break;
|
||||
}
|
||||
if (trackedMoves.size() >= trackedMoves.capacity())
|
||||
{
|
||||
// we cant track any more moves, expand here
|
||||
lastStore = i;
|
||||
break;
|
||||
}
|
||||
trackedMoves.push_back(i);
|
||||
continue;
|
||||
}
|
||||
// check if FPR is overwritten
|
||||
imlInstruction->CheckRegisterUsage(®istersUsed);
|
||||
if (registersUsed.writtenGPR1.IsValidAndSameRegID(fprIndex) || registersUsed.writtenGPR2.IsValidAndSameRegID(fprIndex))
|
||||
break;
|
||||
if (registersUsed.readGPR1.IsValidAndSameRegID(fprIndex))
|
||||
break;
|
||||
if (registersUsed.readGPR2.IsValidAndSameRegID(fprIndex))
|
||||
break;
|
||||
if (registersUsed.readGPR3.IsValidAndSameRegID(fprIndex))
|
||||
break;
|
||||
if (registersUsed.readGPR4.IsValidAndSameRegID(fprIndex))
|
||||
break;
|
||||
}
|
||||
|
||||
if (foundMatch)
|
||||
{
|
||||
// insert expand instructions for each target register of a move
|
||||
sint32 positionBias = 0;
|
||||
for (auto& trackedMove : trackedMoves)
|
||||
{
|
||||
sint32 realPosition = trackedMove + positionBias;
|
||||
IMLInstruction* imlMoveInstruction = imlSegment->imlList.data() + realPosition;
|
||||
if (realPosition >= lastStore)
|
||||
break; // expand is inserted before this move
|
||||
else
|
||||
lastStore++;
|
||||
|
||||
cemu_assert_debug(imlMoveInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlMoveInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex);
|
||||
cemu_assert_debug(imlMoveInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::F64);
|
||||
auto dstReg = imlMoveInstruction->op_fpr_r_r.regR;
|
||||
IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, realPosition+1); // one after the move
|
||||
newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, dstReg);
|
||||
positionBias++;
|
||||
}
|
||||
// insert expand instruction after store
|
||||
IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore);
|
||||
newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Scans for patterns:
|
||||
* <Load sp float into register f>
|
||||
* <Random unrelated instructions>
|
||||
* <Store sp float from register f>
|
||||
* For these patterns the store and load is modified to work with un-extended values (float remains as float, no double conversion)
|
||||
* The float->double extension is then executed later
|
||||
* Advantages:
|
||||
* Keeps denormals and other special float values intact
|
||||
* Slightly improves performance
|
||||
*/
|
||||
void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
{
|
||||
IMLInstruction* imlInstruction = segIt->imlList.data() + i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
|
||||
{
|
||||
cemu_assert_debug(gprReg.GetBaseFormat() == IMLRegFormat::I64); // todo - proper handling required for non-standard sizes
|
||||
cemu_assert_debug(gprReg.GetRegFormat() == IMLRegFormat::I32);
|
||||
|
||||
IMLRegID gprIndex = gprReg.GetRegID();
|
||||
IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
|
||||
if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false )
|
||||
return;
|
||||
bool foundMatch = false;
|
||||
IMLUsedRegisters registersUsed;
|
||||
sint32 scanRangeEnd = std::min<sint32>(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
|
||||
sint32 i = imlIndexLoad + 1;
|
||||
for (; i < scanRangeEnd; i++)
|
||||
{
|
||||
IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
|
||||
if (imlInstruction->IsSuffixInstruction())
|
||||
break;
|
||||
// check if GPR is stored
|
||||
if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) )
|
||||
{
|
||||
if (imlInstruction->op_storeLoad.registerMem.GetRegID() == gprIndex)
|
||||
break;
|
||||
if (imlInstruction->op_storeLoad.registerData.GetRegID() == gprIndex)
|
||||
{
|
||||
IMLInstruction* imlInstructionStore = imlInstruction;
|
||||
if (foundMatch == false)
|
||||
{
|
||||
// switch the endian swap flag for the load instruction
|
||||
imlInstructionLoad->op_storeLoad.flags2.swapEndian = !imlInstructionLoad->op_storeLoad.flags2.swapEndian;
|
||||
foundMatch = true;
|
||||
}
|
||||
// switch the endian swap flag for the store instruction
|
||||
imlInstructionStore->op_storeLoad.flags2.swapEndian = !imlInstructionStore->op_storeLoad.flags2.swapEndian;
|
||||
// keep scanning
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// check if GPR is accessed
|
||||
imlInstruction->CheckRegisterUsage(®istersUsed);
|
||||
if (registersUsed.readGPR1.IsValidAndSameRegID(gprIndex) ||
|
||||
registersUsed.readGPR2.IsValidAndSameRegID(gprIndex) ||
|
||||
registersUsed.readGPR3.IsValidAndSameRegID(gprIndex))
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (registersUsed.IsBaseGPRWritten(gprReg))
|
||||
return; // GPR overwritten, we don't need to byte swap anymore
|
||||
}
|
||||
if (foundMatch)
|
||||
{
|
||||
PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprReg, gprReg);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Scans for patterns:
|
||||
* <Load sp integer into register r>
|
||||
* <Random unrelated instructions>
|
||||
* <Store sp integer from register r>
|
||||
* For these patterns the store and load is modified to work with non-swapped values
|
||||
* The big_endian->little_endian conversion is then executed later
|
||||
* Advantages:
|
||||
* Slightly improves performance
|
||||
*/
|
||||
void IMLOptimizer_OptimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
{
|
||||
IMLInstruction* imlInstruction = segIt->imlList.data() + i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian )
|
||||
{
|
||||
PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg);
|
||||
|
||||
sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg)
|
||||
{
|
||||
if (gqrReg.IsInvalid())
|
||||
return -1;
|
||||
sint32 namedReg = PPCRecompilerImlGen_GetRegName(ppcImlGenContext, gqrReg);
|
||||
if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7))
|
||||
{
|
||||
return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_suspicious();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue)
|
||||
{
|
||||
// the default configuration is:
|
||||
// UGQR0 = 0x00000000
|
||||
// UGQR2 = 0x00040004
|
||||
// UGQR3 = 0x00050005
|
||||
// UGQR4 = 0x00060006
|
||||
// UGQR5 = 0x00070007
|
||||
// but games are free to modify UGQR2 to UGQR7 it seems.
|
||||
// no game modifies UGQR0 so it's safe enough to optimize for the default value
|
||||
// Ideally we would do some kind of runtime tracking and second recompilation to create fast paths for PSQ_L/PSQ_ST but thats todo
|
||||
if (gqrIndex == 0)
|
||||
gqrValue = 0x00000000;
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// analyses register dependencies across the entire function
|
||||
// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten)
|
||||
class IMLOptimizerRegIOAnalysis
|
||||
{
|
||||
public:
|
||||
// constructor with segment pointer list as span
|
||||
IMLOptimizerRegIOAnalysis(std::span<IMLSegment*> segmentList, uint32 maxRegId) : m_segmentList(segmentList), m_maxRegId(maxRegId)
|
||||
{
|
||||
m_segRegisterInOutList.resize(segmentList.size());
|
||||
}
|
||||
|
||||
struct IMLSegmentRegisterInOut
|
||||
{
|
||||
// todo - since our register ID range is usually pretty small (<64) we could use integer bitmasks to accelerate this? There is a helper class used in RA code already
|
||||
std::unordered_set<IMLRegID> regWritten; // registers which are modified in this segment
|
||||
std::unordered_set<IMLRegID> regImported; // registers which are read in this segment before they are written (importing value from previous segments)
|
||||
std::unordered_set<IMLRegID> regForward; // registers which are not read or written in this segment, but are imported into a later segment (propagated info)
|
||||
};
|
||||
|
||||
// calculate which registers are imported (read-before-written) and forwarded (read-before-written by a later segment) per segment
|
||||
// then in a second step propagate the dependencies across linked segments
|
||||
void ComputeDepedencies()
|
||||
{
|
||||
std::vector<IMLSegmentRegisterInOut>& segRegisterInOutList = m_segRegisterInOutList;
|
||||
IMLSegmentRegisterInOut* segIO = segRegisterInOutList.data();
|
||||
uint32 index = 0;
|
||||
for(auto& seg : m_segmentList)
|
||||
{
|
||||
seg->momentaryIndex = index;
|
||||
index++;
|
||||
for(auto& instr : seg->imlList)
|
||||
{
|
||||
IMLUsedRegisters registerUsage;
|
||||
instr.CheckRegisterUsage(®isterUsage);
|
||||
// registers are considered imported if they are read before being written in this seg
|
||||
registerUsage.ForEachReadGPR([&](IMLReg gprReg) {
|
||||
IMLRegID gprId = gprReg.GetRegID();
|
||||
if (!segIO->regWritten.contains(gprId))
|
||||
{
|
||||
segIO->regImported.insert(gprId);
|
||||
}
|
||||
});
|
||||
registerUsage.ForEachWrittenGPR([&](IMLReg gprReg) {
|
||||
IMLRegID gprId = gprReg.GetRegID();
|
||||
segIO->regWritten.insert(gprId);
|
||||
});
|
||||
}
|
||||
segIO++;
|
||||
}
|
||||
// for every exit segment, import all registers
|
||||
for(auto& seg : m_segmentList)
|
||||
{
|
||||
if (!seg->nextSegmentIsUncertain)
|
||||
continue;
|
||||
if(seg->deadCodeEliminationHintSeg)
|
||||
continue;
|
||||
IMLSegmentRegisterInOut& segIO = segRegisterInOutList[seg->momentaryIndex];
|
||||
for(uint32 i=0; i<=m_maxRegId; i++)
|
||||
{
|
||||
segIO.regImported.insert((IMLRegID)i);
|
||||
}
|
||||
}
|
||||
// broadcast dependencies across segment chains
|
||||
std::unordered_set<uint32> segIdsWhichNeedUpdate;
|
||||
for (uint32 i = 0; i < m_segmentList.size(); i++)
|
||||
{
|
||||
segIdsWhichNeedUpdate.insert(i);
|
||||
}
|
||||
while(!segIdsWhichNeedUpdate.empty())
|
||||
{
|
||||
auto firstIt = segIdsWhichNeedUpdate.begin();
|
||||
uint32 segId = *firstIt;
|
||||
segIdsWhichNeedUpdate.erase(firstIt);
|
||||
// forward regImported and regForward to earlier segments into their regForward, unless the register is written
|
||||
auto& curSeg = m_segmentList[segId];
|
||||
IMLSegmentRegisterInOut& curSegIO = segRegisterInOutList[segId];
|
||||
for(auto& prevSeg : curSeg->list_prevSegments)
|
||||
{
|
||||
IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex];
|
||||
bool prevSegChanged = false;
|
||||
for(auto& regId : curSegIO.regImported)
|
||||
{
|
||||
if (!prevSegIO.regWritten.contains(regId))
|
||||
prevSegChanged |= prevSegIO.regForward.insert(regId).second;
|
||||
}
|
||||
for(auto& regId : curSegIO.regForward)
|
||||
{
|
||||
if (!prevSegIO.regWritten.contains(regId))
|
||||
prevSegChanged |= prevSegIO.regForward.insert(regId).second;
|
||||
}
|
||||
if(prevSegChanged)
|
||||
segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex);
|
||||
}
|
||||
// same for hint links
|
||||
for(auto& prevSeg : curSeg->list_deadCodeHintBy)
|
||||
{
|
||||
IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex];
|
||||
bool prevSegChanged = false;
|
||||
for(auto& regId : curSegIO.regImported)
|
||||
{
|
||||
if (!prevSegIO.regWritten.contains(regId))
|
||||
prevSegChanged |= prevSegIO.regForward.insert(regId).second;
|
||||
}
|
||||
for(auto& regId : curSegIO.regForward)
|
||||
{
|
||||
if (!prevSegIO.regWritten.contains(regId))
|
||||
prevSegChanged |= prevSegIO.regForward.insert(regId).second;
|
||||
}
|
||||
if(prevSegChanged)
|
||||
segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<IMLRegID> GetRegistersNeededAtEndOfSegment(IMLSegment& seg)
|
||||
{
|
||||
std::unordered_set<IMLRegID> regsNeeded;
|
||||
if(seg.nextSegmentIsUncertain)
|
||||
{
|
||||
if(seg.deadCodeEliminationHintSeg)
|
||||
{
|
||||
auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex];
|
||||
regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
|
||||
regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
// add all regs
|
||||
for(uint32 i = 0; i <= m_maxRegId; i++)
|
||||
regsNeeded.insert(i);
|
||||
}
|
||||
return regsNeeded;
|
||||
}
|
||||
if(seg.nextSegmentBranchTaken)
|
||||
{
|
||||
auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex];
|
||||
regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
|
||||
regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
|
||||
}
|
||||
if(seg.nextSegmentBranchNotTaken)
|
||||
{
|
||||
auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex];
|
||||
regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end());
|
||||
regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end());
|
||||
}
|
||||
return regsNeeded;
|
||||
}
|
||||
|
||||
bool IsRegisterNeededAtEndOfSegment(IMLSegment& seg, IMLRegID regId)
|
||||
{
|
||||
if(seg.nextSegmentIsUncertain)
|
||||
{
|
||||
if(!seg.deadCodeEliminationHintSeg)
|
||||
return true;
|
||||
auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex];
|
||||
if(nextSegIO.regImported.contains(regId))
|
||||
return true;
|
||||
if(nextSegIO.regForward.contains(regId))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
if(seg.nextSegmentBranchTaken)
|
||||
{
|
||||
auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex];
|
||||
if(nextSegIO.regImported.contains(regId))
|
||||
return true;
|
||||
if(nextSegIO.regForward.contains(regId))
|
||||
return true;
|
||||
}
|
||||
if(seg.nextSegmentBranchNotTaken)
|
||||
{
|
||||
auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex];
|
||||
if(nextSegIO.regImported.contains(regId))
|
||||
return true;
|
||||
if(nextSegIO.regForward.contains(regId))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
std::span<IMLSegment*> m_segmentList;
|
||||
uint32 m_maxRegId;
|
||||
|
||||
std::vector<IMLSegmentRegisterInOut> m_segRegisterInOutList;
|
||||
|
||||
};
|
||||
|
||||
// scan backwards starting from index and return the index of the first found instruction which writes to the given register (by id)
|
||||
sint32 IMLUtil_FindInstructionWhichWritesRegister(IMLSegment& seg, sint32 startIndex, IMLReg reg, sint32 maxScanDistance = -1)
|
||||
{
|
||||
sint32 endIndex = std::max<sint32>(startIndex - maxScanDistance, 0);
|
||||
for (sint32 i = startIndex; i >= endIndex; i--)
|
||||
{
|
||||
IMLInstruction& imlInstruction = seg.imlList[i];
|
||||
IMLUsedRegisters registersUsed;
|
||||
imlInstruction.CheckRegisterUsage(®istersUsed);
|
||||
if (registersUsed.IsBaseGPRWritten(reg))
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// returns true if the instruction can safely be moved while keeping ordering constraints and data dependencies intact
|
||||
// initialIndex is inclusive, targetIndex is exclusive
|
||||
bool IMLUtil_CanMoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex)
|
||||
{
|
||||
boost::container::static_vector<IMLRegID, 8> regsWritten;
|
||||
boost::container::static_vector<IMLRegID, 8> regsRead;
|
||||
// get list of read and written registers
|
||||
IMLUsedRegisters registersUsed;
|
||||
seg.imlList[initialIndex].CheckRegisterUsage(®istersUsed);
|
||||
registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) {
|
||||
if (isWritten)
|
||||
regsWritten.push_back(reg.GetRegID());
|
||||
else
|
||||
regsRead.push_back(reg.GetRegID());
|
||||
});
|
||||
// check all the instructions inbetween
|
||||
if(initialIndex < targetIndex)
|
||||
{
|
||||
sint32 scanStartIndex = initialIndex+1; // +1 to skip the moving instruction itself
|
||||
sint32 scanEndIndex = targetIndex;
|
||||
for (sint32 i = scanStartIndex; i < scanEndIndex; i++)
|
||||
{
|
||||
IMLUsedRegisters registersUsed;
|
||||
seg.imlList[i].CheckRegisterUsage(®istersUsed);
|
||||
// in order to be able to move an instruction past another instruction, any of the read registers must not be modified (written)
|
||||
// and any of it's written registers must not be read
|
||||
bool canMove = true;
|
||||
registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) {
|
||||
IMLRegID regId = reg.GetRegID();
|
||||
if (!isWritten)
|
||||
canMove = canMove && std::find(regsWritten.begin(), regsWritten.end(), regId) == regsWritten.end();
|
||||
else
|
||||
canMove = canMove && std::find(regsRead.begin(), regsRead.end(), regId) == regsRead.end();
|
||||
});
|
||||
if(!canMove)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented(); // backwards scan is todo
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
sint32 IMLUtil_CountRegisterReadsInRange(IMLSegment& seg, sint32 scanStartIndex, sint32 scanEndIndex, IMLRegID regId)
|
||||
{
|
||||
cemu_assert_debug(scanStartIndex <= scanEndIndex);
|
||||
cemu_assert_debug(scanEndIndex < seg.imlList.size());
|
||||
sint32 count = 0;
|
||||
for (sint32 i = scanStartIndex; i <= scanEndIndex; i++)
|
||||
{
|
||||
IMLUsedRegisters registersUsed;
|
||||
seg.imlList[i].CheckRegisterUsage(®istersUsed);
|
||||
registersUsed.ForEachReadGPR([&](IMLReg reg) {
|
||||
if (reg.GetRegID() == regId)
|
||||
count++;
|
||||
});
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// move instruction from one index to another
|
||||
// instruction will be inserted before the instruction at targetIndex
|
||||
// returns the new instruction index of the moved instruction
|
||||
sint32 IMLUtil_MoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex)
|
||||
{
|
||||
cemu_assert_debug(initialIndex != targetIndex);
|
||||
IMLInstruction temp = seg.imlList[initialIndex];
|
||||
if (initialIndex < targetIndex)
|
||||
{
|
||||
cemu_assert_debug(targetIndex > 0);
|
||||
targetIndex--;
|
||||
for(size_t i=initialIndex; i<targetIndex; i++)
|
||||
seg.imlList[i] = seg.imlList[i+1];
|
||||
seg.imlList[targetIndex] = temp;
|
||||
return targetIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented(); // testing needed
|
||||
std::copy(seg.imlList.begin() + targetIndex, seg.imlList.begin() + initialIndex, seg.imlList.begin() + targetIndex + 1);
|
||||
seg.imlList[targetIndex] = temp;
|
||||
return targetIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// x86 specific
|
||||
bool IMLOptimizerX86_ModifiesEFlags(IMLInstruction& inst)
|
||||
{
|
||||
// this is a very conservative implementation. There are more cases but this is good enough for now
|
||||
if(inst.type == PPCREC_IML_TYPE_NAME_R || inst.type == PPCREC_IML_TYPE_R_NAME)
|
||||
return false;
|
||||
if((inst.type == PPCREC_IML_TYPE_R_R || inst.type == PPCREC_IML_TYPE_R_S32) && inst.operation == PPCREC_IML_OP_ASSIGN)
|
||||
return false;
|
||||
return true; // if we dont know for sure, assume it does
|
||||
}
|
||||
|
||||
void IMLOptimizer_DebugPrintSeg(ppcImlGenContext_t& ppcImlGenContext, IMLSegment& seg)
|
||||
{
|
||||
printf("----------------\n");
|
||||
IMLDebug_DumpSegment(&ppcImlGenContext, &seg);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
|
||||
{
|
||||
// algorithm works like this:
|
||||
// Calculate which registers need to be preserved at the end of each segment
|
||||
// Then for each segment:
|
||||
// - Iterate instructions backwards
|
||||
// - Maintain a list of registers which are read at a later point (initially this is the list from the first step)
|
||||
// - If an instruction only modifies registers which are not in the read list and has no side effects, then it is dead code and can be replaced with a no-op
|
||||
|
||||
std::unordered_set<IMLRegID> regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg);
|
||||
|
||||
// start with suffix instruction
|
||||
if(seg.HasSuffixInstruction())
|
||||
{
|
||||
IMLInstruction& imlInstruction = seg.imlList[seg.GetSuffixInstructionIndex()];
|
||||
IMLUsedRegisters registersUsed;
|
||||
imlInstruction.CheckRegisterUsage(®istersUsed);
|
||||
registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
|
||||
regsNeeded.erase(reg.GetRegID());
|
||||
});
|
||||
registersUsed.ForEachReadGPR([&](IMLReg reg) {
|
||||
regsNeeded.insert(reg.GetRegID());
|
||||
});
|
||||
}
|
||||
// iterate instructions backwards
|
||||
for (sint32 i = seg.imlList.size() - (seg.HasSuffixInstruction() ? 2:1); i >= 0; i--)
|
||||
{
|
||||
IMLInstruction& imlInstruction = seg.imlList[i];
|
||||
IMLUsedRegisters registersUsed;
|
||||
imlInstruction.CheckRegisterUsage(®istersUsed);
|
||||
// register read -> remove from overwritten list
|
||||
// register written -> add to overwritten list
|
||||
|
||||
// check if this instruction only writes registers which will never be read
|
||||
bool onlyWritesRedundantRegisters = true;
|
||||
registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
|
||||
if (regsNeeded.contains(reg.GetRegID()))
|
||||
onlyWritesRedundantRegisters = false;
|
||||
});
|
||||
// check if any of the written registers are read after this point
|
||||
registersUsed.ForEachWrittenGPR([&](IMLReg reg) {
|
||||
regsNeeded.erase(reg.GetRegID());
|
||||
});
|
||||
registersUsed.ForEachReadGPR([&](IMLReg reg) {
|
||||
regsNeeded.insert(reg.GetRegID());
|
||||
});
|
||||
if(!imlInstruction.HasSideEffects() && onlyWritesRedundantRegisters)
|
||||
{
|
||||
imlInstruction.make_no_op();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IMLOptimizerX86_SubstituteCJumpForEflagsJump(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
|
||||
{
|
||||
// convert and optimize bool condition jumps to eflags condition jumps
|
||||
// - Moves eflag setter (e.g. cmp) closer to eflags consumer (conditional jump) if necessary. If not possible but required then exit early
|
||||
// - Since we only rely on eflags, the boolean register can be optimized out if DCE considers it unused
|
||||
// - Further detect and optimize patterns like DEC + CMP + JCC into fused ops (todo)
|
||||
|
||||
// check if this segment ends with a conditional jump
|
||||
if(!seg.HasSuffixInstruction())
|
||||
return;
|
||||
sint32 cjmpInstIndex = seg.GetSuffixInstructionIndex();
|
||||
if(cjmpInstIndex < 0)
|
||||
return;
|
||||
IMLInstruction& cjumpInstr = seg.imlList[cjmpInstIndex];
|
||||
if( cjumpInstr.type != PPCREC_IML_TYPE_CONDITIONAL_JUMP )
|
||||
return;
|
||||
IMLReg regCondBool = cjumpInstr.op_conditional_jump.registerBool;
|
||||
bool invertedCondition = !cjumpInstr.op_conditional_jump.mustBeTrue;
|
||||
// find the instruction which sets the bool
|
||||
sint32 cmpInstrIndex = IMLUtil_FindInstructionWhichWritesRegister(seg, cjmpInstIndex-1, regCondBool, 20);
|
||||
if(cmpInstrIndex < 0)
|
||||
return;
|
||||
// check if its an instruction combo which can be optimized (currently only cmp + cjump) and get the condition
|
||||
IMLInstruction& condSetterInstr = seg.imlList[cmpInstrIndex];
|
||||
IMLCondition cond;
|
||||
if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE)
|
||||
cond = condSetterInstr.op_compare.cond;
|
||||
else if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE_S32)
|
||||
cond = condSetterInstr.op_compare_s32.cond;
|
||||
else
|
||||
return;
|
||||
// check if instructions inbetween modify eflags
|
||||
sint32 indexEflagsSafeStart = -1; // index of the first instruction which does not modify eflags up to cjump
|
||||
for(sint32 i = cjmpInstIndex-1; i > cmpInstrIndex; i--)
|
||||
{
|
||||
if(IMLOptimizerX86_ModifiesEFlags(seg.imlList[i]))
|
||||
{
|
||||
indexEflagsSafeStart = i+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(indexEflagsSafeStart >= 0)
|
||||
{
|
||||
cemu_assert(indexEflagsSafeStart > 0);
|
||||
// there are eflags-modifying instructions inbetween the bool setter and cjump
|
||||
// try to move the eflags setter close enough to the cjump (to indexEflagsSafeStart)
|
||||
bool canMove = IMLUtil_CanMoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart);
|
||||
if(!canMove)
|
||||
{
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
cmpInstrIndex = IMLUtil_MoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart);
|
||||
}
|
||||
}
|
||||
// we can turn the jump into an eflags jump
|
||||
cjumpInstr.make_x86_eflags_jcc(cond, invertedCondition);
|
||||
|
||||
if (IMLUtil_CountRegisterReadsInRange(seg, cmpInstrIndex, cjmpInstIndex, regCondBool.GetRegID()) > 1 || regIoAnalysis.IsRegisterNeededAtEndOfSegment(seg, regCondBool.GetRegID()))
|
||||
return; // bool register is used beyond the CMP, we can't drop it
|
||||
|
||||
auto& cmpInstr = seg.imlList[cmpInstrIndex];
|
||||
cemu_assert_debug(cmpInstr.type == PPCREC_IML_TYPE_COMPARE || cmpInstr.type == PPCREC_IML_TYPE_COMPARE_S32);
|
||||
if(cmpInstr.type == PPCREC_IML_TYPE_COMPARE)
|
||||
{
|
||||
IMLReg regA = cmpInstr.op_compare.regA;
|
||||
IMLReg regB = cmpInstr.op_compare.regB;
|
||||
seg.imlList[cmpInstrIndex].make_r_r(PPCREC_IML_OP_X86_CMP, regA, regB);
|
||||
}
|
||||
else
|
||||
{
|
||||
IMLReg regA = cmpInstr.op_compare_s32.regA;
|
||||
sint32 val = cmpInstr.op_compare_s32.immS32;
|
||||
seg.imlList[cmpInstrIndex].make_r_s32(PPCREC_IML_OP_X86_CMP, regA, val);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg)
|
||||
{
|
||||
IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg);
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
// x86 specific optimizations
|
||||
IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis)
|
||||
#endif
|
||||
}
|
||||
|
||||
void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext)
|
||||
{
|
||||
IMLOptimizerRegIOAnalysis regIoAnalysis(ppcImlGenContext.segmentList2, ppcImlGenContext.GetMaxRegId());
|
||||
regIoAnalysis.ComputeDepedencies();
|
||||
for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
IMLOptimizer_StandardOptimizationPassForSegment(regIoAnalysis, *segIt);
|
||||
}
|
||||
}
|
2204
src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
Normal file
2204
src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
Normal file
File diff suppressed because it is too large
Load diff
125
src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
Normal file
125
src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
Normal file
|
@ -0,0 +1,125 @@
|
|||
#pragma once
|
||||
|
||||
// container for storing a set of register indices
|
||||
// specifically optimized towards storing typical range of physical register indices (expected to be below 64)
|
||||
class IMLPhysRegisterSet
|
||||
{
|
||||
public:
|
||||
void SetAvailable(uint32 index)
|
||||
{
|
||||
cemu_assert_debug(index < 64);
|
||||
m_regBitmask |= ((uint64)1 << index);
|
||||
}
|
||||
|
||||
void SetReserved(uint32 index)
|
||||
{
|
||||
cemu_assert_debug(index < 64);
|
||||
m_regBitmask &= ~((uint64)1 << index);
|
||||
}
|
||||
|
||||
void SetAllAvailable()
|
||||
{
|
||||
m_regBitmask = ~0ull;
|
||||
}
|
||||
|
||||
bool HasAllAvailable() const
|
||||
{
|
||||
return m_regBitmask == ~0ull;
|
||||
}
|
||||
|
||||
bool IsAvailable(uint32 index) const
|
||||
{
|
||||
return (m_regBitmask & ((uint64)1 << index)) != 0;
|
||||
}
|
||||
|
||||
IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other)
|
||||
{
|
||||
this->m_regBitmask &= other.m_regBitmask;
|
||||
return *this;
|
||||
}
|
||||
|
||||
IMLPhysRegisterSet& operator=(const IMLPhysRegisterSet& other)
|
||||
{
|
||||
this->m_regBitmask = other.m_regBitmask;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void RemoveRegisters(const IMLPhysRegisterSet& other)
|
||||
{
|
||||
this->m_regBitmask &= ~other.m_regBitmask;
|
||||
}
|
||||
|
||||
bool HasAnyAvailable() const
|
||||
{
|
||||
return m_regBitmask != 0;
|
||||
}
|
||||
|
||||
bool HasExactlyOneAvailable() const
|
||||
{
|
||||
return m_regBitmask != 0 && (m_regBitmask & (m_regBitmask - 1)) == 0;
|
||||
}
|
||||
|
||||
// returns index of first available register. Do not call when HasAnyAvailable() == false
|
||||
IMLPhysReg GetFirstAvailableReg()
|
||||
{
|
||||
cemu_assert_debug(m_regBitmask != 0);
|
||||
sint32 regIndex = 0;
|
||||
auto tmp = m_regBitmask;
|
||||
while ((tmp & 0xFF) == 0)
|
||||
{
|
||||
regIndex += 8;
|
||||
tmp >>= 8;
|
||||
}
|
||||
while ((tmp & 0x1) == 0)
|
||||
{
|
||||
regIndex++;
|
||||
tmp >>= 1;
|
||||
}
|
||||
return regIndex;
|
||||
}
|
||||
|
||||
// returns index of next available register (search includes any register index >= startIndex)
|
||||
// returns -1 if there is no more register
|
||||
IMLPhysReg GetNextAvailableReg(sint32 startIndex) const
|
||||
{
|
||||
if (startIndex >= 64)
|
||||
return -1;
|
||||
uint32 regIndex = startIndex;
|
||||
auto tmp = m_regBitmask;
|
||||
tmp >>= regIndex;
|
||||
if (!tmp)
|
||||
return -1;
|
||||
while ((tmp & 0xFF) == 0)
|
||||
{
|
||||
regIndex += 8;
|
||||
tmp >>= 8;
|
||||
}
|
||||
while ((tmp & 0x1) == 0)
|
||||
{
|
||||
regIndex++;
|
||||
tmp >>= 1;
|
||||
}
|
||||
return regIndex;
|
||||
}
|
||||
|
||||
sint32 CountAvailableRegs() const
|
||||
{
|
||||
return std::popcount(m_regBitmask);
|
||||
}
|
||||
|
||||
private:
|
||||
uint64 m_regBitmask{ 0 };
|
||||
};
|
||||
|
||||
struct IMLRegisterAllocatorParameters
|
||||
{
|
||||
inline IMLPhysRegisterSet& GetPhysRegPool(IMLRegFormat regFormat)
|
||||
{
|
||||
return perTypePhysPool[stdx::to_underlying(regFormat)];
|
||||
}
|
||||
|
||||
IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];
|
||||
std::unordered_map<IMLRegID, IMLName> regIdToName;
|
||||
};
|
||||
|
||||
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam);
|
|
@ -0,0 +1,635 @@
|
|||
#include "../PPCRecompiler.h"
|
||||
#include "../PPCRecompilerIml.h"
|
||||
#include "IMLRegisterAllocatorRanges.h"
|
||||
#include "util/helpers/MemoryPool.h"
|
||||
|
||||
uint32 IMLRA_GetNextIterationIndex();
|
||||
|
||||
IMLRegID raLivenessRange::GetVirtualRegister() const
|
||||
{
|
||||
return virtualRegister;
|
||||
}
|
||||
|
||||
sint32 raLivenessRange::GetPhysicalRegister() const
|
||||
{
|
||||
return physicalRegister;
|
||||
}
|
||||
|
||||
IMLName raLivenessRange::GetName() const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
void raLivenessRange::SetPhysicalRegister(IMLPhysReg physicalRegister)
|
||||
{
|
||||
this->physicalRegister = physicalRegister;
|
||||
}
|
||||
|
||||
void raLivenessRange::SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister)
|
||||
{
|
||||
auto clusterRanges = GetAllSubrangesInCluster();
|
||||
for(auto& range : clusterRanges)
|
||||
range->physicalRegister = physicalRegister;
|
||||
}
|
||||
|
||||
boost::container::small_vector<raLivenessRange*, 128> raLivenessRange::GetAllSubrangesInCluster()
|
||||
{
|
||||
uint32 iterationIndex = IMLRA_GetNextIterationIndex();
|
||||
boost::container::small_vector<raLivenessRange*, 128> subranges;
|
||||
subranges.push_back(this);
|
||||
this->lastIterationIndex = iterationIndex;
|
||||
size_t i = 0;
|
||||
while(i<subranges.size())
|
||||
{
|
||||
raLivenessRange* cur = subranges[i];
|
||||
i++;
|
||||
// check successors
|
||||
if(cur->subrangeBranchTaken && cur->subrangeBranchTaken->lastIterationIndex != iterationIndex)
|
||||
{
|
||||
cur->subrangeBranchTaken->lastIterationIndex = iterationIndex;
|
||||
subranges.push_back(cur->subrangeBranchTaken);
|
||||
}
|
||||
if(cur->subrangeBranchNotTaken && cur->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
|
||||
{
|
||||
cur->subrangeBranchNotTaken->lastIterationIndex = iterationIndex;
|
||||
subranges.push_back(cur->subrangeBranchNotTaken);
|
||||
}
|
||||
// check predecessors
|
||||
for(auto& prev : cur->previousRanges)
|
||||
{
|
||||
if(prev->lastIterationIndex != iterationIndex)
|
||||
{
|
||||
prev->lastIterationIndex = iterationIndex;
|
||||
subranges.push_back(prev);
|
||||
}
|
||||
}
|
||||
}
|
||||
return subranges;
|
||||
}
|
||||
|
||||
void raLivenessRange::GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs)
|
||||
{
|
||||
range->lastIterationIndex = iterationIndex;
|
||||
for (auto& it : range->list_fixedRegRequirements)
|
||||
allowedRegs &= it.allowedReg;
|
||||
// check successors
|
||||
if (range->subrangeBranchTaken && range->subrangeBranchTaken->lastIterationIndex != iterationIndex)
|
||||
GetAllowedRegistersExRecursive(range->subrangeBranchTaken, iterationIndex, allowedRegs);
|
||||
if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
|
||||
GetAllowedRegistersExRecursive(range->subrangeBranchNotTaken, iterationIndex, allowedRegs);
|
||||
// check predecessors
|
||||
for (auto& prev : range->previousRanges)
|
||||
{
|
||||
if (prev->lastIterationIndex != iterationIndex)
|
||||
GetAllowedRegistersExRecursive(prev, iterationIndex, allowedRegs);
|
||||
}
|
||||
};
|
||||
|
||||
bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters)
|
||||
{
|
||||
uint32 iterationIndex = IMLRA_GetNextIterationIndex();
|
||||
allowedRegisters.SetAllAvailable();
|
||||
GetAllowedRegistersExRecursive(this, iterationIndex, allowedRegisters);
|
||||
return !allowedRegisters.HasAllAvailable();
|
||||
}
|
||||
|
||||
IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool)
|
||||
{
|
||||
IMLPhysRegisterSet fixedRegRequirements = regPool;
|
||||
if(interval.ExtendsPreviousSegment() || interval.ExtendsIntoNextSegment())
|
||||
{
|
||||
auto clusterRanges = GetAllSubrangesInCluster();
|
||||
for(auto& subrange : clusterRanges)
|
||||
{
|
||||
for(auto& fixedRegLoc : subrange->list_fixedRegRequirements)
|
||||
fixedRegRequirements &= fixedRegLoc.allowedReg;
|
||||
}
|
||||
return fixedRegRequirements;
|
||||
}
|
||||
for(auto& fixedRegLoc : list_fixedRegRequirements)
|
||||
fixedRegRequirements &= fixedRegLoc.allowedReg;
|
||||
return fixedRegRequirements;
|
||||
}
|
||||
|
||||
void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessRange*>& root, raLivenessRange* subrange)
|
||||
{
|
||||
IMLRegID regId = subrange->GetVirtualRegister();
|
||||
auto it = root.find(regId);
|
||||
if (it == root.end())
|
||||
{
|
||||
// new single element
|
||||
root.try_emplace(regId, subrange);
|
||||
subrange->link_sameVirtualRegister.prev = nullptr;
|
||||
subrange->link_sameVirtualRegister.next = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
// insert in first position
|
||||
raLivenessRange* priorFirst = it->second;
|
||||
subrange->link_sameVirtualRegister.next = priorFirst;
|
||||
it->second = subrange;
|
||||
subrange->link_sameVirtualRegister.prev = nullptr;
|
||||
priorFirst->link_sameVirtualRegister.prev = subrange;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange)
|
||||
{
|
||||
subrange->link_allSegmentRanges.next = *root;
|
||||
if (*root)
|
||||
(*root)->link_allSegmentRanges.prev = subrange;
|
||||
subrange->link_allSegmentRanges.prev = nullptr;
|
||||
*root = subrange;
|
||||
}
|
||||
|
||||
void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessRange*>& root, raLivenessRange* subrange)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
raLivenessRange* cur = root.find(subrange->GetVirtualRegister())->second;
|
||||
bool hasRangeFound = false;
|
||||
while(cur)
|
||||
{
|
||||
if(cur == subrange)
|
||||
{
|
||||
hasRangeFound = true;
|
||||
break;
|
||||
}
|
||||
cur = cur->link_sameVirtualRegister.next;
|
||||
}
|
||||
cemu_assert_debug(hasRangeFound);
|
||||
#endif
|
||||
IMLRegID regId = subrange->GetVirtualRegister();
|
||||
raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next;
|
||||
raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev;
|
||||
raLivenessRange* newBase = prevRange ? prevRange : nextRange;
|
||||
if (prevRange)
|
||||
prevRange->link_sameVirtualRegister.next = subrange->link_sameVirtualRegister.next;
|
||||
if (nextRange)
|
||||
nextRange->link_sameVirtualRegister.prev = subrange->link_sameVirtualRegister.prev;
|
||||
|
||||
if (!prevRange)
|
||||
{
|
||||
if (nextRange)
|
||||
{
|
||||
root.find(regId)->second = nextRange;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_debug(root.find(regId)->second == subrange);
|
||||
root.erase(regId);
|
||||
}
|
||||
}
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
subrange->link_sameVirtualRegister.prev = (raLivenessRange*)1;
|
||||
subrange->link_sameVirtualRegister.next = (raLivenessRange*)1;
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange)
|
||||
{
|
||||
raLivenessRange* tempPrev = subrange->link_allSegmentRanges.prev;
|
||||
if (subrange->link_allSegmentRanges.prev)
|
||||
subrange->link_allSegmentRanges.prev->link_allSegmentRanges.next = subrange->link_allSegmentRanges.next;
|
||||
else
|
||||
(*root) = subrange->link_allSegmentRanges.next;
|
||||
if (subrange->link_allSegmentRanges.next)
|
||||
subrange->link_allSegmentRanges.next->link_allSegmentRanges.prev = tempPrev;
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
subrange->link_allSegmentRanges.prev = (raLivenessRange*)1;
|
||||
subrange->link_allSegmentRanges.next = (raLivenessRange*)1;
|
||||
#endif
|
||||
}
|
||||
|
||||
MemoryPoolPermanentObjects<raLivenessRange> memPool_livenessSubrange(4096);
|
||||
|
||||
// startPosition and endPosition are inclusive
|
||||
raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition)
|
||||
{
|
||||
raLivenessRange* range = memPool_livenessSubrange.acquireObj();
|
||||
range->previousRanges.clear();
|
||||
range->list_accessLocations.clear();
|
||||
range->list_fixedRegRequirements.clear();
|
||||
range->imlSegment = imlSegment;
|
||||
|
||||
cemu_assert_debug(startPosition <= endPosition);
|
||||
range->interval.start = startPosition;
|
||||
range->interval.end = endPosition;
|
||||
|
||||
// register mapping
|
||||
range->virtualRegister = virtualRegister;
|
||||
range->name = name;
|
||||
range->physicalRegister = -1;
|
||||
// default values
|
||||
range->hasStore = false;
|
||||
range->hasStoreDelayed = false;
|
||||
range->lastIterationIndex = 0;
|
||||
range->subrangeBranchNotTaken = nullptr;
|
||||
range->subrangeBranchTaken = nullptr;
|
||||
cemu_assert_debug(range->previousRanges.empty());
|
||||
range->_noLoad = false;
|
||||
// add to segment linked lists
|
||||
PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range);
|
||||
PPCRecRARange_addLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range);
|
||||
return range;
|
||||
}
|
||||
|
||||
void _unlinkSubrange(raLivenessRange* range)
|
||||
{
|
||||
IMLSegment* imlSegment = range->imlSegment;
|
||||
PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range);
|
||||
PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range);
|
||||
// unlink reverse references
|
||||
if(range->subrangeBranchTaken)
|
||||
range->subrangeBranchTaken->previousRanges.erase(std::find(range->subrangeBranchTaken->previousRanges.begin(), range->subrangeBranchTaken->previousRanges.end(), range));
|
||||
if(range->subrangeBranchNotTaken)
|
||||
range->subrangeBranchNotTaken->previousRanges.erase(std::find(range->subrangeBranchNotTaken->previousRanges.begin(), range->subrangeBranchNotTaken->previousRanges.end(), range));
|
||||
range->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1;
|
||||
range->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1;
|
||||
// remove forward references
|
||||
for(auto& prev : range->previousRanges)
|
||||
{
|
||||
if(prev->subrangeBranchTaken == range)
|
||||
prev->subrangeBranchTaken = nullptr;
|
||||
if(prev->subrangeBranchNotTaken == range)
|
||||
prev->subrangeBranchNotTaken = nullptr;
|
||||
}
|
||||
range->previousRanges.clear();
|
||||
}
|
||||
|
||||
void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range)
|
||||
{
|
||||
_unlinkSubrange(range);
|
||||
range->list_accessLocations.clear();
|
||||
range->list_fixedRegRequirements.clear();
|
||||
memPool_livenessSubrange.releaseObj(range);
|
||||
}
|
||||
|
||||
void IMLRA_DeleteRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range)
|
||||
{
|
||||
auto clusterRanges = range->GetAllSubrangesInCluster();
|
||||
for (auto& subrange : clusterRanges)
|
||||
IMLRA_DeleteRange(ppcImlGenContext, subrange);
|
||||
}
|
||||
|
||||
void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(auto& seg : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
raLivenessRange* cur;
|
||||
while(cur = seg->raInfo.linkedList_allSubranges)
|
||||
IMLRA_DeleteRange(ppcImlGenContext, cur);
|
||||
seg->raInfo.linkedList_allSubranges = nullptr;
|
||||
seg->raInfo.linkedList_perVirtualRegister.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
PPCRecRA_debugValidateSubrange(absorbedSubrange);
|
||||
if (subrange->imlSegment != absorbedSubrange->imlSegment)
|
||||
assert_dbg();
|
||||
cemu_assert_debug(subrange->interval.end == absorbedSubrange->interval.start);
|
||||
|
||||
if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
|
||||
assert_dbg();
|
||||
if (subrange == absorbedSubrange)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// update references
|
||||
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
|
||||
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
|
||||
absorbedSubrange->subrangeBranchTaken = nullptr;
|
||||
absorbedSubrange->subrangeBranchNotTaken = nullptr;
|
||||
if(subrange->subrangeBranchTaken)
|
||||
*std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange;
|
||||
if(subrange->subrangeBranchNotTaken)
|
||||
*std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange;
|
||||
|
||||
// merge usage locations
|
||||
for (auto& accessLoc : absorbedSubrange->list_accessLocations)
|
||||
subrange->list_accessLocations.push_back(accessLoc);
|
||||
absorbedSubrange->list_accessLocations.clear();
|
||||
// merge fixed reg locations
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty())
|
||||
{
|
||||
cemu_assert_debug(subrange->list_fixedRegRequirements.back().pos < absorbedSubrange->list_fixedRegRequirements.front().pos);
|
||||
}
|
||||
#endif
|
||||
for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements)
|
||||
subrange->list_fixedRegRequirements.push_back(fixedReg);
|
||||
absorbedSubrange->list_fixedRegRequirements.clear();
|
||||
|
||||
subrange->interval.end = absorbedSubrange->interval.end;
|
||||
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
|
||||
IMLRA_DeleteRange(ppcImlGenContext, absorbedSubrange);
|
||||
}
|
||||
|
||||
// remove all inter-segment connections from the range cluster and split it into local ranges. Ranges are trimmed and if they have no access location they will be removed
|
||||
void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange)
|
||||
{
|
||||
cemu_assert_debug(originRange->interval.ExtendsPreviousSegment() || originRange->interval.ExtendsIntoNextSegment()); // only call this on ranges that span multiple segments
|
||||
auto clusterRanges = originRange->GetAllSubrangesInCluster();
|
||||
for (auto& subrange : clusterRanges)
|
||||
{
|
||||
if (subrange->list_accessLocations.empty())
|
||||
continue;
|
||||
raInterval interval;
|
||||
interval.SetInterval(subrange->list_accessLocations.front().pos, subrange->list_accessLocations.back().pos);
|
||||
raLivenessRange* newSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end);
|
||||
// copy locations and fixed reg indices
|
||||
newSubrange->list_accessLocations = subrange->list_accessLocations;
|
||||
newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements;
|
||||
if(originRange->HasPhysicalRegister())
|
||||
{
|
||||
cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement
|
||||
}
|
||||
// validate
|
||||
if(!newSubrange->list_accessLocations.empty())
|
||||
{
|
||||
cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval.start);
|
||||
cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval.end);
|
||||
}
|
||||
if(!newSubrange->list_fixedRegRequirements.empty())
|
||||
{
|
||||
cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters()
|
||||
cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval.end);
|
||||
}
|
||||
}
|
||||
// delete the original range cluster
|
||||
IMLRA_DeleteRangeCluster(ppcImlGenContext, originRange);
|
||||
}
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
|
||||
{
|
||||
// validate subrange
|
||||
if (range->subrangeBranchTaken && range->subrangeBranchTaken->imlSegment != range->imlSegment->nextSegmentBranchTaken)
|
||||
assert_dbg();
|
||||
if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->imlSegment != range->imlSegment->nextSegmentBranchNotTaken)
|
||||
assert_dbg();
|
||||
|
||||
if(range->subrangeBranchTaken || range->subrangeBranchNotTaken)
|
||||
{
|
||||
cemu_assert_debug(range->interval.end.ConnectsToNextSegment());
|
||||
}
|
||||
if(!range->previousRanges.empty())
|
||||
{
|
||||
cemu_assert_debug(range->interval.start.ConnectsToPreviousSegment());
|
||||
}
|
||||
// validate locations
|
||||
if (!range->list_accessLocations.empty())
|
||||
{
|
||||
cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval.start);
|
||||
cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval.end);
|
||||
}
|
||||
// validate fixed reg requirements
|
||||
if (!range->list_fixedRegRequirements.empty())
|
||||
{
|
||||
cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval.start);
|
||||
cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval.end);
|
||||
for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++)
|
||||
cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos);
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {}
|
||||
#endif
|
||||
|
||||
// trim start and end of range to match first and last read/write locations
|
||||
// does not trim start/endpoints which extend into the next/previous segment
|
||||
void IMLRA_TrimRangeToUse(raLivenessRange* range)
|
||||
{
|
||||
if(range->list_accessLocations.empty())
|
||||
{
|
||||
// special case where we trim ranges extending from other segments to a single instruction edge
|
||||
cemu_assert_debug(!range->interval.start.IsInstructionIndex() || !range->interval.end.IsInstructionIndex());
|
||||
if(range->interval.start.IsInstructionIndex())
|
||||
range->interval.start = range->interval.end;
|
||||
if(range->interval.end.IsInstructionIndex())
|
||||
range->interval.end = range->interval.start;
|
||||
return;
|
||||
}
|
||||
// trim start and end
|
||||
raInterval prevInterval = range->interval;
|
||||
if(range->interval.start.IsInstructionIndex())
|
||||
range->interval.start = range->list_accessLocations.front().pos;
|
||||
if(range->interval.end.IsInstructionIndex())
|
||||
range->interval.end = range->list_accessLocations.back().pos;
|
||||
// extra checks
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
cemu_assert_debug(range->interval.start <= range->interval.end);
|
||||
for(auto& loc : range->list_accessLocations)
|
||||
{
|
||||
cemu_assert_debug(range->interval.ContainsEdge(loc.pos));
|
||||
}
|
||||
cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval));
|
||||
#endif
|
||||
}
|
||||
|
||||
// split range at the given position
|
||||
// After the split there will be two ranges:
|
||||
// head -> subrange is shortened to end at splitIndex (exclusive)
|
||||
// tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange
|
||||
// if head has a physical register assigned it will not carry over to tail
|
||||
// The return value is the tail range
|
||||
// If trimToUsage is true, the end of the head subrange and the start of the tail subrange will be shrunk to fit the read/write locations within. If there are no locations then the range will be deleted
|
||||
raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage)
|
||||
{
|
||||
cemu_assert_debug(splitPosition.IsInstructionIndex());
|
||||
cemu_assert_debug(!subrange->interval.IsNextSegmentOnly() && !subrange->interval.IsPreviousSegmentOnly());
|
||||
cemu_assert_debug(subrange->interval.ContainsEdge(splitPosition));
|
||||
// determine new intervals
|
||||
raInterval headInterval, tailInterval;
|
||||
headInterval.SetInterval(subrange->interval.start, splitPosition-1);
|
||||
tailInterval.SetInterval(splitPosition, subrange->interval.end);
|
||||
cemu_assert_debug(headInterval.start <= headInterval.end);
|
||||
cemu_assert_debug(tailInterval.start <= tailInterval.end);
|
||||
// create tail
|
||||
raLivenessRange* tailSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), tailInterval.start, tailInterval.end);
|
||||
tailSubrange->SetPhysicalRegister(subrange->GetPhysicalRegister());
|
||||
// carry over branch targets and update reverse references
|
||||
tailSubrange->subrangeBranchTaken = subrange->subrangeBranchTaken;
|
||||
tailSubrange->subrangeBranchNotTaken = subrange->subrangeBranchNotTaken;
|
||||
subrange->subrangeBranchTaken = nullptr;
|
||||
subrange->subrangeBranchNotTaken = nullptr;
|
||||
if(tailSubrange->subrangeBranchTaken)
|
||||
*std::find(tailSubrange->subrangeBranchTaken->previousRanges.begin(), tailSubrange->subrangeBranchTaken->previousRanges.end(), subrange) = tailSubrange;
|
||||
if(tailSubrange->subrangeBranchNotTaken)
|
||||
*std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange;
|
||||
// we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if(subrange->list_accessLocations.size() > 1)
|
||||
{
|
||||
for(size_t i=0; i<subrange->list_accessLocations.size()-1; i++)
|
||||
{
|
||||
cemu_assert_debug(subrange->list_accessLocations[i].pos < subrange->list_accessLocations[i+1].pos);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// split locations
|
||||
auto it = std::lower_bound(
|
||||
subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), splitPosition,
|
||||
[](const raAccessLocation& accessLoc, raInstructionEdge value) { return accessLoc.pos < value; }
|
||||
);
|
||||
size_t originalCount = subrange->list_accessLocations.size();
|
||||
tailSubrange->list_accessLocations.insert(tailSubrange->list_accessLocations.end(), it, subrange->list_accessLocations.end());
|
||||
subrange->list_accessLocations.erase(it, subrange->list_accessLocations.end());
|
||||
cemu_assert_debug(subrange->list_accessLocations.empty() || subrange->list_accessLocations.back().pos < splitPosition);
|
||||
cemu_assert_debug(tailSubrange->list_accessLocations.empty() || tailSubrange->list_accessLocations.front().pos >= splitPosition);
|
||||
cemu_assert_debug(subrange->list_accessLocations.size() + tailSubrange->list_accessLocations.size() == originalCount);
|
||||
// split fixed reg requirements
|
||||
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
||||
{
|
||||
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
|
||||
if (tailInterval.ContainsEdge(fixedReg->pos))
|
||||
{
|
||||
tailSubrange->list_fixedRegRequirements.push_back(*fixedReg);
|
||||
}
|
||||
}
|
||||
// remove tail fixed reg requirements from head
|
||||
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
||||
{
|
||||
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
|
||||
if (!headInterval.ContainsEdge(fixedReg->pos))
|
||||
{
|
||||
subrange->list_fixedRegRequirements.resize(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// adjust intervals
|
||||
subrange->interval = headInterval;
|
||||
tailSubrange->interval = tailInterval;
|
||||
// trim to hole
|
||||
if(trimToUsage)
|
||||
{
|
||||
if(subrange->list_accessLocations.empty() && (subrange->interval.start.IsInstructionIndex() && subrange->interval.end.IsInstructionIndex()))
|
||||
{
|
||||
IMLRA_DeleteRange(ppcImlGenContext, subrange);
|
||||
subrange = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
IMLRA_TrimRangeToUse(subrange);
|
||||
}
|
||||
if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval.start.IsInstructionIndex() && tailSubrange->interval.end.IsInstructionIndex()))
|
||||
{
|
||||
IMLRA_DeleteRange(ppcImlGenContext, tailSubrange);
|
||||
tailSubrange = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
IMLRA_TrimRangeToUse(tailSubrange);
|
||||
}
|
||||
}
|
||||
// validation
|
||||
cemu_assert_debug(!subrange || subrange->interval.start <= subrange->interval.end);
|
||||
cemu_assert_debug(!tailSubrange || tailSubrange->interval.start <= tailSubrange->interval.end);
|
||||
cemu_assert_debug(!tailSubrange || tailSubrange->interval.start >= splitPosition);
|
||||
if (!trimToUsage)
|
||||
cemu_assert_debug(!tailSubrange || tailSubrange->interval.start == splitPosition);
|
||||
|
||||
if(subrange)
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
if(tailSubrange)
|
||||
PPCRecRA_debugValidateSubrange(tailSubrange);
|
||||
return tailSubrange;
|
||||
}
|
||||
|
||||
sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment)
|
||||
{
|
||||
sint32 v = imlSegment->loopDepth + 1;
|
||||
v *= 5;
|
||||
return v*v; // 25, 100, 225, 400
|
||||
}
|
||||
|
||||
// calculate additional cost of range that it would have after calling _ExplodeRange() on it
|
||||
sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange)
|
||||
{
|
||||
auto ranges = subrange->GetAllSubrangesInCluster();
|
||||
sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges);
|
||||
for (auto& subrange : ranges)
|
||||
{
|
||||
if (subrange->list_accessLocations.empty())
|
||||
continue; // this range would be deleted and thus has no cost
|
||||
sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
|
||||
bool hasAdditionalLoad = subrange->interval.ExtendsPreviousSegment();
|
||||
bool hasAdditionalStore = subrange->interval.ExtendsIntoNextSegment();
|
||||
if(hasAdditionalLoad && subrange->list_accessLocations.front().IsWrite()) // if written before read then a load isn't necessary
|
||||
{
|
||||
cemu_assert_debug(!subrange->list_accessLocations.front().IsRead());
|
||||
cost += segmentLoadStoreCost;
|
||||
}
|
||||
if(hasAdditionalStore)
|
||||
{
|
||||
bool hasWrite = std::find_if(subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != subrange->list_accessLocations.end();
|
||||
if(!hasWrite) // ranges which don't modify their value do not need to be stored
|
||||
cost += segmentLoadStoreCost;
|
||||
}
|
||||
}
|
||||
// todo - properly calculating all the data-flow dependency based costs is more complex so this currently is an approximation
|
||||
return cost;
|
||||
}
|
||||
|
||||
sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition)
|
||||
{
|
||||
// validation
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (subrange->interval.ExtendsIntoNextSegment())
|
||||
assert_dbg();
|
||||
#endif
|
||||
cemu_assert_debug(splitPosition.IsInstructionIndex());
|
||||
|
||||
sint32 cost = 0;
|
||||
// find split position in location list
|
||||
if (subrange->list_accessLocations.empty())
|
||||
return 0;
|
||||
if (splitPosition <= subrange->list_accessLocations.front().pos)
|
||||
return 0;
|
||||
if (splitPosition > subrange->list_accessLocations.back().pos)
|
||||
return 0;
|
||||
|
||||
size_t firstTailLocationIndex = 0;
|
||||
for (size_t i = 0; i < subrange->list_accessLocations.size(); i++)
|
||||
{
|
||||
if (subrange->list_accessLocations[i].pos >= splitPosition)
|
||||
{
|
||||
firstTailLocationIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
std::span<raAccessLocation> headLocations{subrange->list_accessLocations.data(), firstTailLocationIndex};
|
||||
std::span<raAccessLocation> tailLocations{subrange->list_accessLocations.data() + firstTailLocationIndex, subrange->list_accessLocations.size() - firstTailLocationIndex};
|
||||
cemu_assert_debug(headLocations.empty() || headLocations.back().pos < splitPosition);
|
||||
cemu_assert_debug(tailLocations.empty() || tailLocations.front().pos >= splitPosition);
|
||||
|
||||
sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
|
||||
|
||||
auto CalculateCostFromLocationRange = [segmentLoadStoreCost](std::span<raAccessLocation> locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32
|
||||
{
|
||||
if(locations.empty())
|
||||
return 0;
|
||||
sint32 cost = 0;
|
||||
if(locations.front().IsRead() && trackLoadCost)
|
||||
cost += segmentLoadStoreCost; // not overwritten, so there is a load cost
|
||||
bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != locations.end();
|
||||
if(hasWrite && trackStoreCost)
|
||||
cost += segmentLoadStoreCost; // modified, so there is a store cost
|
||||
return cost;
|
||||
};
|
||||
|
||||
sint32 baseCost = CalculateCostFromLocationRange(subrange->list_accessLocations);
|
||||
|
||||
bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().IsRead() && tailLocations.front().IsWrite();
|
||||
|
||||
sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true);
|
||||
cemu_assert_debug(newCost >= baseCost);
|
||||
cost = newCost - baseCost;
|
||||
|
||||
return cost;
|
||||
}
|
364
src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
Normal file
364
src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
Normal file
|
@ -0,0 +1,364 @@
|
|||
#pragma once
|
||||
#include "IMLRegisterAllocator.h"
|
||||
|
||||
struct raLivenessSubrangeLink
|
||||
{
|
||||
struct raLivenessRange* prev;
|
||||
struct raLivenessRange* next;
|
||||
};
|
||||
|
||||
struct raInstructionEdge
|
||||
{
|
||||
friend struct raInterval;
|
||||
public:
|
||||
raInstructionEdge()
|
||||
{
|
||||
index = 0;
|
||||
}
|
||||
|
||||
raInstructionEdge(sint32 instructionIndex, bool isInputEdge)
|
||||
{
|
||||
Set(instructionIndex, isInputEdge);
|
||||
}
|
||||
|
||||
void Set(sint32 instructionIndex, bool isInputEdge)
|
||||
{
|
||||
if(instructionIndex == RA_INTER_RANGE_START || instructionIndex == RA_INTER_RANGE_END)
|
||||
{
|
||||
index = instructionIndex;
|
||||
return;
|
||||
}
|
||||
index = instructionIndex * 2 + (isInputEdge ? 0 : 1);
|
||||
cemu_assert_debug(index >= 0 && index < 0x100000*2); // make sure index value is sane
|
||||
}
|
||||
|
||||
void SetRaw(sint32 index)
|
||||
{
|
||||
this->index = index;
|
||||
cemu_assert_debug(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END || (index >= 0 && index < 0x100000*2)); // make sure index value is sane
|
||||
}
|
||||
|
||||
// sint32 GetRaw()
|
||||
// {
|
||||
// this->index = index;
|
||||
// }
|
||||
|
||||
std::string GetDebugString()
|
||||
{
|
||||
if(index == RA_INTER_RANGE_START)
|
||||
return "RA_START";
|
||||
else if(index == RA_INTER_RANGE_END)
|
||||
return "RA_END";
|
||||
std::string str = fmt::format("{}", GetInstructionIndex());
|
||||
if(IsOnInputEdge())
|
||||
str += "i";
|
||||
else if(IsOnOutputEdge())
|
||||
str += "o";
|
||||
return str;
|
||||
}
|
||||
|
||||
sint32 GetInstructionIndex() const
|
||||
{
|
||||
cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
|
||||
return index >> 1;
|
||||
}
|
||||
|
||||
// returns instruction index or RA_INTER_RANGE_START/RA_INTER_RANGE_END
|
||||
sint32 GetInstructionIndexEx() const
|
||||
{
|
||||
if(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END)
|
||||
return index;
|
||||
return index >> 1;
|
||||
}
|
||||
|
||||
sint32 GetRaw() const
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
bool IsOnInputEdge() const
|
||||
{
|
||||
cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
|
||||
return (index&1) == 0;
|
||||
}
|
||||
|
||||
bool IsOnOutputEdge() const
|
||||
{
|
||||
cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
|
||||
return (index&1) != 0;
|
||||
}
|
||||
|
||||
bool ConnectsToPreviousSegment() const
|
||||
{
|
||||
return index == RA_INTER_RANGE_START;
|
||||
}
|
||||
|
||||
bool ConnectsToNextSegment() const
|
||||
{
|
||||
return index == RA_INTER_RANGE_END;
|
||||
}
|
||||
|
||||
bool IsInstructionIndex() const
|
||||
{
|
||||
return index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END;
|
||||
}
|
||||
|
||||
// comparison operators
|
||||
bool operator>(const raInstructionEdge& other) const
|
||||
{
|
||||
return index > other.index;
|
||||
}
|
||||
bool operator<(const raInstructionEdge& other) const
|
||||
{
|
||||
return index < other.index;
|
||||
}
|
||||
bool operator<=(const raInstructionEdge& other) const
|
||||
{
|
||||
return index <= other.index;
|
||||
}
|
||||
bool operator>=(const raInstructionEdge& other) const
|
||||
{
|
||||
return index >= other.index;
|
||||
}
|
||||
bool operator==(const raInstructionEdge& other) const
|
||||
{
|
||||
return index == other.index;
|
||||
}
|
||||
|
||||
raInstructionEdge operator+(sint32 offset) const
|
||||
{
|
||||
cemu_assert_debug(IsInstructionIndex());
|
||||
cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END);
|
||||
raInstructionEdge edge;
|
||||
edge.index = index + offset;
|
||||
return edge;
|
||||
}
|
||||
|
||||
raInstructionEdge operator-(sint32 offset) const
|
||||
{
|
||||
cemu_assert_debug(IsInstructionIndex());
|
||||
cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END);
|
||||
raInstructionEdge edge;
|
||||
edge.index = index - offset;
|
||||
return edge;
|
||||
}
|
||||
|
||||
raInstructionEdge& operator++()
|
||||
{
|
||||
cemu_assert_debug(IsInstructionIndex());
|
||||
index++;
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
sint32 index; // can also be RA_INTER_RANGE_START or RA_INTER_RANGE_END, otherwise contains instruction index * 2
|
||||
|
||||
};
|
||||
|
||||
struct raAccessLocation
|
||||
{
|
||||
raAccessLocation(raInstructionEdge pos) : pos(pos) {}
|
||||
|
||||
bool IsRead() const
|
||||
{
|
||||
return pos.IsOnInputEdge();
|
||||
}
|
||||
|
||||
bool IsWrite() const
|
||||
{
|
||||
return pos.IsOnOutputEdge();
|
||||
}
|
||||
|
||||
raInstructionEdge pos;
|
||||
};
|
||||
|
||||
struct raInterval
|
||||
{
|
||||
raInterval()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
raInterval(raInstructionEdge start, raInstructionEdge end)
|
||||
{
|
||||
SetInterval(start, end);
|
||||
}
|
||||
|
||||
// isStartOnInput = Input+Output edge on first instruction. If false then only output
|
||||
// isEndOnOutput = Input+Output edge on last instruction. If false then only input
|
||||
void SetInterval(sint32 start, bool isStartOnInput, sint32 end, bool isEndOnOutput)
|
||||
{
|
||||
this->start.Set(start, isStartOnInput);
|
||||
this->end.Set(end, !isEndOnOutput);
|
||||
}
|
||||
|
||||
void SetInterval(raInstructionEdge start, raInstructionEdge end)
|
||||
{
|
||||
cemu_assert_debug(start <= end);
|
||||
this->start = start;
|
||||
this->end = end;
|
||||
}
|
||||
|
||||
void SetStart(const raInstructionEdge& edge)
|
||||
{
|
||||
start = edge;
|
||||
}
|
||||
|
||||
void SetEnd(const raInstructionEdge& edge)
|
||||
{
|
||||
end = edge;
|
||||
}
|
||||
|
||||
sint32 GetStartIndex() const
|
||||
{
|
||||
return start.GetInstructionIndex();
|
||||
}
|
||||
|
||||
sint32 GetEndIndex() const
|
||||
{
|
||||
return end.GetInstructionIndex();
|
||||
}
|
||||
|
||||
bool ExtendsPreviousSegment() const
|
||||
{
|
||||
return start.ConnectsToPreviousSegment();
|
||||
}
|
||||
|
||||
bool ExtendsIntoNextSegment() const
|
||||
{
|
||||
return end.ConnectsToNextSegment();
|
||||
}
|
||||
|
||||
bool IsNextSegmentOnly() const
|
||||
{
|
||||
return start.ConnectsToNextSegment() && end.ConnectsToNextSegment();
|
||||
}
|
||||
|
||||
bool IsPreviousSegmentOnly() const
|
||||
{
|
||||
return start.ConnectsToPreviousSegment() && end.ConnectsToPreviousSegment();
|
||||
}
|
||||
|
||||
// returns true if range is contained within a single segment
|
||||
bool IsLocal() const
|
||||
{
|
||||
return start.GetRaw() > RA_INTER_RANGE_START && end.GetRaw() < RA_INTER_RANGE_END;
|
||||
}
|
||||
|
||||
bool ContainsInstructionIndex(sint32 instructionIndex) const
|
||||
{
|
||||
cemu_assert_debug(instructionIndex != RA_INTER_RANGE_START && instructionIndex != RA_INTER_RANGE_END);
|
||||
return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx();
|
||||
}
|
||||
|
||||
// similar to ContainsInstructionIndex, but allows RA_INTER_RANGE_START/END as input
|
||||
bool ContainsInstructionIndexEx(sint32 instructionIndex) const
|
||||
{
|
||||
if(instructionIndex == RA_INTER_RANGE_START)
|
||||
return start.ConnectsToPreviousSegment();
|
||||
if(instructionIndex == RA_INTER_RANGE_END)
|
||||
return end.ConnectsToNextSegment();
|
||||
return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx();
|
||||
}
|
||||
|
||||
bool ContainsEdge(const raInstructionEdge& edge) const
|
||||
{
|
||||
return edge >= start && edge <= end;
|
||||
}
|
||||
|
||||
bool ContainsWholeInterval(const raInterval& other) const
|
||||
{
|
||||
return other.start >= start && other.end <= end;
|
||||
}
|
||||
|
||||
bool IsOverlapping(const raInterval& other) const
|
||||
{
|
||||
return start <= other.end && end >= other.start;
|
||||
}
|
||||
|
||||
sint32 GetPreciseDistance()
|
||||
{
|
||||
cemu_assert_debug(!start.ConnectsToNextSegment()); // how to handle this?
|
||||
if(start == end)
|
||||
return 1;
|
||||
cemu_assert_debug(!end.ConnectsToPreviousSegment() && !end.ConnectsToNextSegment());
|
||||
if(start.ConnectsToPreviousSegment())
|
||||
return end.GetRaw() + 1;
|
||||
|
||||
return end.GetRaw() - start.GetRaw() + 1; // +1 because end is inclusive
|
||||
}
|
||||
|
||||
//private: not making these directly accessible only forces us to create loads of verbose getters and setters
|
||||
raInstructionEdge start;
|
||||
raInstructionEdge end;
|
||||
};
|
||||
|
||||
struct raFixedRegRequirement
|
||||
{
|
||||
raInstructionEdge pos;
|
||||
IMLPhysRegisterSet allowedReg;
|
||||
};
|
||||
|
||||
struct raLivenessRange
|
||||
{
|
||||
IMLSegment* imlSegment;
|
||||
raInterval interval;
|
||||
|
||||
// dirty state tracking
|
||||
bool _noLoad;
|
||||
bool hasStore;
|
||||
bool hasStoreDelayed;
|
||||
// next
|
||||
raLivenessRange* subrangeBranchTaken;
|
||||
raLivenessRange* subrangeBranchNotTaken;
|
||||
// reverse counterpart of BranchTaken/BranchNotTaken
|
||||
boost::container::small_vector<raLivenessRange*, 4> previousRanges;
|
||||
// processing
|
||||
uint32 lastIterationIndex;
|
||||
// instruction read/write locations
|
||||
std::vector<raAccessLocation> list_accessLocations;
|
||||
// ordered list of all raInstructionEdge indices which require a fixed register
|
||||
std::vector<raFixedRegRequirement> list_fixedRegRequirements;
|
||||
// linked list (subranges with same GPR virtual register)
|
||||
raLivenessSubrangeLink link_sameVirtualRegister;
|
||||
// linked list (all subranges for this segment)
|
||||
raLivenessSubrangeLink link_allSegmentRanges;
|
||||
// register info
|
||||
IMLRegID virtualRegister;
|
||||
IMLName name;
|
||||
// register allocator result
|
||||
IMLPhysReg physicalRegister;
|
||||
|
||||
boost::container::small_vector<raLivenessRange*, 128> GetAllSubrangesInCluster();
|
||||
bool GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters); // if the cluster has fixed register requirements in any instruction this returns the combined register mask. Otherwise returns false in which case allowedRegisters is left undefined
|
||||
IMLPhysRegisterSet GetAllowedRegisters(IMLPhysRegisterSet regPool); // return regPool with fixed register requirements filtered out
|
||||
|
||||
IMLRegID GetVirtualRegister() const;
|
||||
sint32 GetPhysicalRegister() const;
|
||||
bool HasPhysicalRegister() const { return physicalRegister >= 0; }
|
||||
IMLName GetName() const;
|
||||
void SetPhysicalRegister(IMLPhysReg physicalRegister);
|
||||
void SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister);
|
||||
void UnsetPhysicalRegister() { physicalRegister = -1; }
|
||||
|
||||
private:
|
||||
void GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs);
|
||||
};
|
||||
|
||||
raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition);
|
||||
void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange);
|
||||
void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange);
|
||||
|
||||
void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange);
|
||||
|
||||
raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage = false);
|
||||
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
|
||||
|
||||
// cost estimation
|
||||
sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment);
|
||||
sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange);
|
||||
//sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex);
|
||||
sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition);
|
133
src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp
Normal file
133
src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp
Normal file
|
@ -0,0 +1,133 @@
|
|||
#include "IMLInstruction.h"
|
||||
#include "IMLSegment.h"
|
||||
|
||||
void IMLSegment::SetEnterable(uint32 enterAddress)
|
||||
{
|
||||
cemu_assert_debug(!isEnterable || enterPPCAddress == enterAddress);
|
||||
isEnterable = true;
|
||||
enterPPCAddress = enterAddress;
|
||||
}
|
||||
|
||||
bool IMLSegment::HasSuffixInstruction() const
|
||||
{
|
||||
if (imlList.empty())
|
||||
return false;
|
||||
const IMLInstruction& imlInstruction = imlList.back();
|
||||
return imlInstruction.IsSuffixInstruction();
|
||||
}
|
||||
|
||||
sint32 IMLSegment::GetSuffixInstructionIndex() const
|
||||
{
|
||||
cemu_assert_debug(HasSuffixInstruction());
|
||||
return (sint32)(imlList.size() - 1);
|
||||
}
|
||||
|
||||
IMLInstruction* IMLSegment::GetLastInstruction()
|
||||
{
|
||||
if (imlList.empty())
|
||||
return nullptr;
|
||||
return &imlList.back();
|
||||
}
|
||||
|
||||
void IMLSegment::SetLinkBranchNotTaken(IMLSegment* imlSegmentDst)
|
||||
{
|
||||
if (nextSegmentBranchNotTaken)
|
||||
nextSegmentBranchNotTaken->list_prevSegments.erase(std::find(nextSegmentBranchNotTaken->list_prevSegments.begin(), nextSegmentBranchNotTaken->list_prevSegments.end(), this));
|
||||
nextSegmentBranchNotTaken = imlSegmentDst;
|
||||
if(imlSegmentDst)
|
||||
imlSegmentDst->list_prevSegments.push_back(this);
|
||||
}
|
||||
|
||||
void IMLSegment::SetLinkBranchTaken(IMLSegment* imlSegmentDst)
|
||||
{
|
||||
if (nextSegmentBranchTaken)
|
||||
nextSegmentBranchTaken->list_prevSegments.erase(std::find(nextSegmentBranchTaken->list_prevSegments.begin(), nextSegmentBranchTaken->list_prevSegments.end(), this));
|
||||
nextSegmentBranchTaken = imlSegmentDst;
|
||||
if (imlSegmentDst)
|
||||
imlSegmentDst->list_prevSegments.push_back(this);
|
||||
}
|
||||
|
||||
IMLInstruction* IMLSegment::AppendInstruction()
|
||||
{
|
||||
IMLInstruction& inst = imlList.emplace_back();
|
||||
memset(&inst, 0, sizeof(IMLInstruction));
|
||||
return &inst;
|
||||
}
|
||||
|
||||
void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
|
||||
{
|
||||
// make sure segments aren't already linked
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
|
||||
return;
|
||||
// add as next segment for source
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken != nullptr)
|
||||
assert_dbg();
|
||||
imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst;
|
||||
// add as previous segment for destination
|
||||
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
|
||||
}
|
||||
|
||||
void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
|
||||
{
|
||||
// make sure segments aren't already linked
|
||||
if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
|
||||
return;
|
||||
// add as next segment for source
|
||||
if (imlSegmentSrc->nextSegmentBranchTaken != nullptr)
|
||||
assert_dbg();
|
||||
imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst;
|
||||
// add as previous segment for destination
|
||||
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
|
||||
}
|
||||
|
||||
void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
|
||||
{
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
|
||||
{
|
||||
imlSegmentSrc->nextSegmentBranchNotTaken = nullptr;
|
||||
}
|
||||
else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
|
||||
{
|
||||
imlSegmentSrc->nextSegmentBranchTaken = nullptr;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
|
||||
bool matchFound = false;
|
||||
for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++)
|
||||
{
|
||||
if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc)
|
||||
{
|
||||
imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin() + i);
|
||||
matchFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matchFound == false)
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
/*
|
||||
* Replaces all links to segment orig with linkts to segment new
|
||||
*/
|
||||
void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew)
|
||||
{
|
||||
while (imlSegmentOrig->list_prevSegments.size() != 0)
|
||||
{
|
||||
IMLSegment* prevSegment = imlSegmentOrig->list_prevSegments[0];
|
||||
if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig)
|
||||
{
|
||||
IMLSegment_RemoveLink(prevSegment, imlSegmentOrig);
|
||||
IMLSegment_SetLinkBranchNotTaken(prevSegment, imlSegmentNew);
|
||||
}
|
||||
else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig)
|
||||
{
|
||||
IMLSegment_RemoveLink(prevSegment, imlSegmentOrig);
|
||||
IMLSegment_SetLinkBranchTaken(prevSegment, imlSegmentNew);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
}
|
193
src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h
Normal file
193
src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h
Normal file
|
@ -0,0 +1,193 @@
|
|||
#pragma once
|
||||
#include "IMLInstruction.h"
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
// special values to mark the index of ranges that reach across the segment border
|
||||
#define RA_INTER_RANGE_START (-1)
|
||||
#define RA_INTER_RANGE_END (0x70000000)
|
||||
|
||||
struct IMLSegmentPoint
|
||||
{
|
||||
friend struct IMLSegmentInterval;
|
||||
|
||||
sint32 index;
|
||||
struct IMLSegment* imlSegment; // do we really need to track this? SegmentPoints are always accessed via the segment that they are part of
|
||||
IMLSegmentPoint* next;
|
||||
IMLSegmentPoint* prev;
|
||||
|
||||
// the index is the instruction index times two.
|
||||
// this gives us the ability to cover half an instruction with RA ranges
|
||||
// covering only the first half of an instruction (0-0) means that the register is read, but not preserved
|
||||
// covering first and the second half means the register is read and preserved
|
||||
// covering only the second half means the register is written but not read
|
||||
|
||||
sint32 GetInstructionIndex() const
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
void SetInstructionIndex(sint32 index)
|
||||
{
|
||||
this->index = index;
|
||||
}
|
||||
|
||||
void ShiftIfAfter(sint32 instructionIndex, sint32 shiftCount)
|
||||
{
|
||||
if (!IsPreviousSegment() && !IsNextSegment())
|
||||
{
|
||||
if (GetInstructionIndex() >= instructionIndex)
|
||||
index += shiftCount;
|
||||
}
|
||||
}
|
||||
|
||||
void DecrementByOneInstruction()
|
||||
{
|
||||
index--;
|
||||
}
|
||||
|
||||
// the segment point can point beyond the first and last instruction which indicates that it is an infinite range reaching up to the previous or next segment
|
||||
bool IsPreviousSegment() const { return index == RA_INTER_RANGE_START; }
|
||||
bool IsNextSegment() const { return index == RA_INTER_RANGE_END; }
|
||||
|
||||
// overload operand > and <
|
||||
bool operator>(const IMLSegmentPoint& other) const { return index > other.index; }
|
||||
bool operator<(const IMLSegmentPoint& other) const { return index < other.index; }
|
||||
bool operator==(const IMLSegmentPoint& other) const { return index == other.index; }
|
||||
bool operator!=(const IMLSegmentPoint& other) const { return index != other.index; }
|
||||
|
||||
// overload comparison operands for sint32
|
||||
bool operator>(const sint32 other) const { return index > other; }
|
||||
bool operator<(const sint32 other) const { return index < other; }
|
||||
bool operator<=(const sint32 other) const { return index <= other; }
|
||||
bool operator>=(const sint32 other) const { return index >= other; }
|
||||
};
|
||||
|
||||
struct IMLSegmentInterval
|
||||
{
|
||||
IMLSegmentPoint start;
|
||||
IMLSegmentPoint end;
|
||||
|
||||
bool ContainsInstructionIndex(sint32 offset) const { return start <= offset && end > offset; }
|
||||
|
||||
bool IsRangeOverlapping(const IMLSegmentInterval& other)
|
||||
{
|
||||
// todo - compare the raw index
|
||||
sint32 r1start = this->start.GetInstructionIndex();
|
||||
sint32 r1end = this->end.GetInstructionIndex();
|
||||
sint32 r2start = other.start.GetInstructionIndex();
|
||||
sint32 r2end = other.end.GetInstructionIndex();
|
||||
if (r1start < r2end && r1end > r2start)
|
||||
return true;
|
||||
if (this->start.IsPreviousSegment() && r1start == r2start)
|
||||
return true;
|
||||
if (this->end.IsNextSegment() && r1end == r2end)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ExtendsIntoPreviousSegment() const
|
||||
{
|
||||
return start.IsPreviousSegment();
|
||||
}
|
||||
|
||||
bool ExtendsIntoNextSegment() const
|
||||
{
|
||||
return end.IsNextSegment();
|
||||
}
|
||||
|
||||
bool IsNextSegmentOnly() const
|
||||
{
|
||||
if(!start.IsNextSegment())
|
||||
return false;
|
||||
cemu_assert_debug(end.IsNextSegment());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsPreviousSegmentOnly() const
|
||||
{
|
||||
if (!end.IsPreviousSegment())
|
||||
return false;
|
||||
cemu_assert_debug(start.IsPreviousSegment());
|
||||
return true;
|
||||
}
|
||||
|
||||
sint32 GetDistance() const
|
||||
{
|
||||
// todo - assert if either start or end is outside the segment
|
||||
// we may also want to switch this to raw indices?
|
||||
return end.GetInstructionIndex() - start.GetInstructionIndex();
|
||||
}
|
||||
};
|
||||
|
||||
struct PPCSegmentRegisterAllocatorInfo_t
|
||||
{
|
||||
// used during loop detection
|
||||
bool isPartOfProcessedLoop{};
|
||||
sint32 lastIterationIndex{};
|
||||
// linked lists
|
||||
struct raLivenessRange* linkedList_allSubranges{};
|
||||
std::unordered_map<IMLRegID, struct raLivenessRange*> linkedList_perVirtualRegister;
|
||||
};
|
||||
|
||||
struct IMLSegment
|
||||
{
|
||||
sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection)
|
||||
sint32 loopDepth{};
|
||||
uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address)
|
||||
uint32 x64Offset{}; // x64 code offset of segment start
|
||||
// list of intermediate instructions in this segment
|
||||
std::vector<IMLInstruction> imlList;
|
||||
// segment link
|
||||
IMLSegment* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch
|
||||
IMLSegment* nextSegmentBranchTaken{};
|
||||
bool nextSegmentIsUncertain{};
|
||||
std::vector<IMLSegment*> list_prevSegments{};
|
||||
// source for overwrite analysis (if nextSegmentIsUncertain is true)
|
||||
// sometimes a segment is marked as an exit point, but for the purposes of dead code elimination we know the next segment
|
||||
IMLSegment* deadCodeEliminationHintSeg{};
|
||||
std::vector<IMLSegment*> list_deadCodeHintBy{};
|
||||
// enterable segments
|
||||
bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
|
||||
uint32 enterPPCAddress{}; // used if isEnterable is true
|
||||
// register allocator info
|
||||
PPCSegmentRegisterAllocatorInfo_t raInfo{};
|
||||
// segment state API
|
||||
void SetEnterable(uint32 enterAddress);
|
||||
void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst);
|
||||
void SetLinkBranchTaken(IMLSegment* imlSegmentDst);
|
||||
|
||||
IMLSegment* GetBranchTaken()
|
||||
{
|
||||
return nextSegmentBranchTaken;
|
||||
}
|
||||
|
||||
IMLSegment* GetBranchNotTaken()
|
||||
{
|
||||
return nextSegmentBranchNotTaken;
|
||||
}
|
||||
|
||||
void SetNextSegmentForOverwriteHints(IMLSegment* seg)
|
||||
{
|
||||
cemu_assert_debug(!deadCodeEliminationHintSeg);
|
||||
deadCodeEliminationHintSeg = seg;
|
||||
if (seg)
|
||||
seg->list_deadCodeHintBy.push_back(this);
|
||||
}
|
||||
|
||||
// instruction API
|
||||
IMLInstruction* AppendInstruction();
|
||||
|
||||
bool HasSuffixInstruction() const;
|
||||
sint32 GetSuffixInstructionIndex() const;
|
||||
IMLInstruction* GetLastInstruction();
|
||||
|
||||
// segment points
|
||||
IMLSegmentPoint* segmentPointList{};
|
||||
};
|
||||
|
||||
|
||||
void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
|
||||
void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
|
||||
void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew);
|
||||
void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst);
|
|
@ -21,6 +21,16 @@ public:
|
|||
};
|
||||
|
||||
public:
|
||||
~PPCFunctionBoundaryTracker()
|
||||
{
|
||||
while (!map_ranges.empty())
|
||||
{
|
||||
PPCRange_t* range = *map_ranges.begin();
|
||||
delete range;
|
||||
map_ranges.erase(map_ranges.begin());
|
||||
}
|
||||
}
|
||||
|
||||
void trackStartPoint(MPTR startAddress)
|
||||
{
|
||||
processRange(startAddress, nullptr, nullptr);
|
||||
|
@ -40,10 +50,34 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
std::vector<PPCRange_t> GetRanges()
|
||||
{
|
||||
std::vector<PPCRange_t> r;
|
||||
for (auto& it : map_ranges)
|
||||
r.emplace_back(*it);
|
||||
return r;
|
||||
}
|
||||
|
||||
bool ContainsAddress(uint32 addr) const
|
||||
{
|
||||
for (auto& it : map_ranges)
|
||||
{
|
||||
if (addr >= it->startAddress && addr < it->getEndAddress())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::set<uint32>& GetBranchTargets() const
|
||||
{
|
||||
return map_branchTargetsAll;
|
||||
}
|
||||
|
||||
private:
|
||||
void addBranchDestination(PPCRange_t* sourceRange, MPTR address)
|
||||
{
|
||||
map_branchTargets.emplace(address);
|
||||
map_queuedBranchTargets.emplace(address);
|
||||
map_branchTargetsAll.emplace(address);
|
||||
}
|
||||
|
||||
// process flow of instruction
|
||||
|
@ -114,7 +148,7 @@ private:
|
|||
Espresso::BOField BO;
|
||||
uint32 BI;
|
||||
bool LK;
|
||||
Espresso::decodeOp_BCLR(opcode, BO, BI, LK);
|
||||
Espresso::decodeOp_BCSPR(opcode, BO, BI, LK);
|
||||
if (BO.branchAlways() && !LK)
|
||||
{
|
||||
// unconditional BLR
|
||||
|
@ -218,7 +252,7 @@ private:
|
|||
auto rangeItr = map_ranges.begin();
|
||||
|
||||
PPCRange_t* previousRange = nullptr;
|
||||
for (std::set<uint32_t>::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); )
|
||||
for (std::set<uint32_t>::const_iterator targetItr = map_queuedBranchTargets.begin() ; targetItr != map_queuedBranchTargets.end(); )
|
||||
{
|
||||
while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr))
|
||||
{
|
||||
|
@ -239,7 +273,7 @@ private:
|
|||
(*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length))
|
||||
{
|
||||
// delete visited targets
|
||||
targetItr = map_branchTargets.erase(targetItr);
|
||||
targetItr = map_queuedBranchTargets.erase(targetItr);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -289,5 +323,6 @@ private:
|
|||
};
|
||||
|
||||
std::set<PPCRange_t*, RangePtrCmp> map_ranges;
|
||||
std::set<uint32> map_branchTargets;
|
||||
std::set<uint32> map_queuedBranchTargets;
|
||||
std::set<uint32> map_branchTargetsAll;
|
||||
};
|
|
@ -2,7 +2,6 @@
|
|||
#include "PPCFunctionBoundaryTracker.h"
|
||||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "Cafe/OS/RPL/rpl.h"
|
||||
#include "util/containers/RangeStore.h"
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
|
||||
|
@ -14,6 +13,17 @@
|
|||
#include "util/helpers/helpers.h"
|
||||
#include "util/MemMapper/MemMapper.h"
|
||||
|
||||
#include "IML/IML.h"
|
||||
#include "IML/IMLRegisterAllocator.h"
|
||||
#include "BackendX64/BackendX64.h"
|
||||
#ifdef __aarch64__
|
||||
#include "BackendAArch64/BackendAArch64.h"
|
||||
#endif
|
||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||
|
||||
#define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock
|
||||
#define PPCREC_LOG_RECOMPILATION_RESULTS 0
|
||||
|
||||
struct PPCInvalidationRange
|
||||
{
|
||||
MPTR startAddress;
|
||||
|
@ -37,11 +47,36 @@ void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
|
|||
|
||||
PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
|
||||
|
||||
#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
|
||||
static std::mutex s_singleRecompilationMutex;
|
||||
#endif
|
||||
|
||||
bool ppcRecompilerEnabled = false;
|
||||
|
||||
void PPCRecompiler_recompileAtAddress(uint32 address);
|
||||
|
||||
// this function does never block and can fail if the recompiler lock cannot be acquired immediately
|
||||
void PPCRecompiler_visitAddressNoBlock(uint32 enterAddress)
|
||||
{
|
||||
#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
|
||||
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
return;
|
||||
PPCRecompilerState.recompilerSpinlock.lock();
|
||||
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
{
|
||||
PPCRecompilerState.recompilerSpinlock.unlock();
|
||||
return;
|
||||
}
|
||||
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited;
|
||||
PPCRecompilerState.recompilerSpinlock.unlock();
|
||||
s_singleRecompilationMutex.lock();
|
||||
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] == PPCRecompiler_leaveRecompilerCode_visited)
|
||||
{
|
||||
PPCRecompiler_recompileAtAddress(enterAddress);
|
||||
}
|
||||
s_singleRecompilationMutex.unlock();
|
||||
return;
|
||||
#endif
|
||||
// quick read-only check without lock
|
||||
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
|
||||
return;
|
||||
|
@ -127,15 +162,15 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress)
|
|||
PPCRecompiler_enter(hCPU, funcPtr);
|
||||
}
|
||||
}
|
||||
bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext);
|
||||
|
||||
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut)
|
||||
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut, PPCFunctionBoundaryTracker& boundaryTracker)
|
||||
{
|
||||
if (range.startAddress >= PPC_REC_CODE_AREA_END)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Attempting to recompile function outside of allowed code area");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32 codeGenRangeStart;
|
||||
uint32 codeGenRangeSize = 0;
|
||||
coreinit::OSGetCodegenVirtAddrRangeInternal(codeGenRangeStart, codeGenRangeSize);
|
||||
|
@ -153,29 +188,69 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
|||
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
|
||||
ppcRecFunc->ppcAddress = range.startAddress;
|
||||
ppcRecFunc->ppcSize = range.length;
|
||||
|
||||
#if PPCREC_LOG_RECOMPILATION_RESULTS
|
||||
BenchmarkTimer bt;
|
||||
bt.Start();
|
||||
#endif
|
||||
|
||||
// generate intermediate code
|
||||
ppcImlGenContext_t ppcImlGenContext = { 0 };
|
||||
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses);
|
||||
ppcImlGenContext.debug_entryPPCAddress = range.startAddress;
|
||||
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker);
|
||||
if (compiledSuccessfully == false)
|
||||
{
|
||||
// todo: Free everything
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
delete ppcRecFunc;
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr();
|
||||
uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr();
|
||||
|
||||
if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0)
|
||||
{
|
||||
if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr)
|
||||
{
|
||||
delete ppcRecFunc;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// apply passes
|
||||
if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext))
|
||||
{
|
||||
delete ppcRecFunc;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
// emit x64 code
|
||||
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
|
||||
if (x64GenerationSuccess == false)
|
||||
{
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return nullptr;
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
bool aarch64GenerationSuccess = PPCRecompiler_generateAArch64Code(ppcRecFunc, &ppcImlGenContext);
|
||||
if (aarch64GenerationSuccess == false)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
if (ActiveSettings::DumpRecompilerFunctionsEnabled())
|
||||
{
|
||||
FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath(fmt::format("dump/recompiler/ppc_{:08x}.bin", ppcRecFunc->ppcAddress)));
|
||||
if (fs)
|
||||
{
|
||||
fs->writeData(ppcRecFunc->x86Code, ppcRecFunc->x86Size);
|
||||
delete fs;
|
||||
}
|
||||
}
|
||||
|
||||
// collect list of PPC-->x64 entry points
|
||||
entryPointsOut.clear();
|
||||
for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++)
|
||||
for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
|
||||
if (imlSegment->isEnterable == false)
|
||||
continue;
|
||||
|
||||
|
@ -185,10 +260,94 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
|||
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
|
||||
}
|
||||
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
#if PPCREC_LOG_RECOMPILATION_RESULTS
|
||||
bt.Stop();
|
||||
uint32 codeHash = 0;
|
||||
for (uint32 i = 0; i < ppcRecFunc->x86Size; i++)
|
||||
{
|
||||
codeHash = _rotr(codeHash, 3);
|
||||
codeHash += ((uint8*)ppcRecFunc->x86Code)[i];
|
||||
}
|
||||
cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash);
|
||||
#endif
|
||||
|
||||
return ppcRecFunc;
|
||||
}
|
||||
|
||||
void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenContext)
|
||||
{
|
||||
IMLRegisterAllocatorParameters raParam;
|
||||
|
||||
for (auto& it : ppcImlGenContext.mappedRegs)
|
||||
raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first);
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBX);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBP);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RSI);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDI);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R12);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
|
||||
|
||||
// add XMM registers, except XMM15 which is the temporary register
|
||||
auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 0);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 1);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 2);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 3);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 4);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 5);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 6);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 7);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 8);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 9);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 10);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 11);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14);
|
||||
#elif defined(__aarch64__)
|
||||
auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
|
||||
for (auto i = IMLArchAArch64::PHYSREG_GPR_BASE; i < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT; i++)
|
||||
{
|
||||
if (i == IMLArchAArch64::PHYSREG_GPR_BASE + 18)
|
||||
continue; // Skip reserved platform register
|
||||
gprPhysPool.SetAvailable(i);
|
||||
}
|
||||
|
||||
auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64);
|
||||
for (auto i = IMLArchAArch64::PHYSREG_FPR_BASE; i < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT; i++)
|
||||
fprPhysPool.SetAvailable(i);
|
||||
#endif
|
||||
|
||||
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam);
|
||||
}
|
||||
|
||||
bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
|
||||
{
|
||||
// isolate entry points from function flow (enterable segments must not be the target of any other segment)
|
||||
// this simplifies logic during register allocation
|
||||
PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext);
|
||||
|
||||
// merge certain float load+store patterns
|
||||
IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext);
|
||||
// delay byte swapping for certain load+store patterns
|
||||
IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext);
|
||||
|
||||
IMLOptimizer_StandardOptimizationPass(ppcImlGenContext);
|
||||
|
||||
PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector<std::pair<MPTR, uint32>>& entryPoints)
|
||||
{
|
||||
// update jump table
|
||||
|
@ -202,7 +361,7 @@ bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFun
|
|||
return false;
|
||||
}
|
||||
|
||||
// check if the current range got invalidated in the time it took to recompile it
|
||||
// check if the current range got invalidated during the time it took to recompile it
|
||||
bool isInvalidated = false;
|
||||
for (auto& invRange : PPCRecompilerState.invalidationRanges)
|
||||
{
|
||||
|
@ -280,7 +439,7 @@ void PPCRecompiler_recompileAtAddress(uint32 address)
|
|||
PPCRecompilerState.recompilerSpinlock.unlock();
|
||||
|
||||
std::vector<std::pair<MPTR, uint32>> functionEntryPoints;
|
||||
auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints);
|
||||
auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints, funcBoundaries);
|
||||
|
||||
if (!func)
|
||||
{
|
||||
|
@ -295,6 +454,10 @@ std::atomic_bool s_recompilerThreadStopSignal{false};
|
|||
void PPCRecompiler_thread()
|
||||
{
|
||||
SetThreadName("PPCRecompiler");
|
||||
#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
|
||||
return;
|
||||
#endif
|
||||
|
||||
while (true)
|
||||
{
|
||||
if(s_recompilerThreadStopSignal)
|
||||
|
@ -475,44 +638,6 @@ void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr)
|
|||
#if defined(ARCH_X86_64)
|
||||
void PPCRecompiler_initPlatform()
|
||||
{
|
||||
// mxcsr
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
|
||||
}
|
||||
#else
|
||||
void PPCRecompiler_initPlatform()
|
||||
{
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
void PPCRecompiler_init()
|
||||
{
|
||||
if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter)
|
||||
{
|
||||
ppcRecompilerEnabled = false;
|
||||
return;
|
||||
}
|
||||
if (LaunchSettings::ForceInterpreter())
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter was passed");
|
||||
return;
|
||||
}
|
||||
if (ppcRecompilerInstanceData)
|
||||
{
|
||||
MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t));
|
||||
ppcRecompilerInstanceData = nullptr;
|
||||
}
|
||||
debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
|
||||
ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
|
||||
MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
PPCRecompiler_allocateRange(0, 0x1000); // the first entry is used for fallback to interpreter
|
||||
PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
|
||||
PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
|
||||
|
||||
// init x64 recompiler instance data
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL;
|
||||
|
@ -548,44 +673,45 @@ void PPCRecompiler_init()
|
|||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000;
|
||||
|
||||
// setup GQR scale tables
|
||||
// mxcsr
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
|
||||
}
|
||||
#else
|
||||
void PPCRecompiler_initPlatform()
|
||||
{
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
void PPCRecompiler_init()
|
||||
{
|
||||
if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter)
|
||||
{
|
||||
float a = 1.0f / (float)(1u << i);
|
||||
float b = 0;
|
||||
if (i == 0)
|
||||
b = 4294967296.0f;
|
||||
else
|
||||
b = (float)(1u << (32u - i));
|
||||
|
||||
float ar = (float)(1u << i);
|
||||
float br = 0;
|
||||
if (i == 0)
|
||||
br = 1.0f / 4294967296.0f;
|
||||
else
|
||||
br = 1.0f / (float)(1u << (32u - i));
|
||||
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 0] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 1] = 1.0f;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 0] = b;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 0] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 1] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 0] = b;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 1] = b;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 0] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 1] = 1.0f;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 0] = br;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 0] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 1] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 0] = br;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br;
|
||||
ppcRecompilerEnabled = false;
|
||||
return;
|
||||
}
|
||||
if (LaunchSettings::ForceInterpreter() || LaunchSettings::ForceMultiCoreInterpreter())
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter or force-multicore-interpreter was passed");
|
||||
return;
|
||||
}
|
||||
if (ppcRecompilerInstanceData)
|
||||
{
|
||||
MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t));
|
||||
ppcRecompilerInstanceData = nullptr;
|
||||
}
|
||||
debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
|
||||
ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
|
||||
MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
#ifdef ARCH_X86_64
|
||||
PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||
#elif defined(__aarch64__)
|
||||
PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
|
||||
#endif
|
||||
PPCRecompiler_allocateRange(0, 0x1000); // the first entry is used for fallback to interpreter
|
||||
PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
|
||||
PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
|
||||
|
||||
PPCRecompiler_initPlatform();
|
||||
|
||||
|
@ -623,4 +749,4 @@ void PPCRecompiler_Shutdown()
|
|||
// mark as unmapped
|
||||
ppcRecompiler_reservedBlockMask[i] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include <vector>
|
||||
#pragma once
|
||||
|
||||
#define PPC_REC_CODE_AREA_START (0x00000000) // lower bound of executable memory area. Recompiler expects this address to be 0
|
||||
#define PPC_REC_CODE_AREA_END (0x10000000) // upper bound of executable memory area
|
||||
|
@ -6,336 +6,113 @@
|
|||
|
||||
#define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1))
|
||||
|
||||
#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
|
||||
#define PPC_REC_MAX_VIRTUAL_GPR (40 + 32) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
|
||||
|
||||
typedef struct
|
||||
struct ppcRecRange_t
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 ppcSize;
|
||||
//void* x86Start;
|
||||
//size_t x86Size;
|
||||
void* storedRange;
|
||||
}ppcRecRange_t;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
struct PPCRecFunction_t
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 ppcSize; // ppc code size of function
|
||||
void* x86Code; // pointer to x86 code
|
||||
size_t x86Size;
|
||||
std::vector<ppcRecRange_t> list_ranges;
|
||||
}PPCRecFunction_t;
|
||||
|
||||
#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0)
|
||||
#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1)
|
||||
#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision
|
||||
#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8 type;
|
||||
uint8 operation;
|
||||
uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr.
|
||||
uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior
|
||||
uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated
|
||||
uint32 associatedPPCAddress; // ppc address that is associated with this instruction
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8 _padding[7];
|
||||
}padding;
|
||||
struct
|
||||
{
|
||||
// R (op) A [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
}op_r_r;
|
||||
struct
|
||||
{
|
||||
// R = A (op) B [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
uint8 registerB;
|
||||
}op_r_r_r;
|
||||
struct
|
||||
{
|
||||
// R = A (op) immS32 [update cr* in mode *]
|
||||
uint8 registerResult;
|
||||
uint8 registerA;
|
||||
sint32 immS32;
|
||||
}op_r_r_s32;
|
||||
struct
|
||||
{
|
||||
// R/F = NAME or NAME = R/F
|
||||
uint8 registerIndex;
|
||||
uint8 copyWidth;
|
||||
uint32 name;
|
||||
uint8 flags;
|
||||
}op_r_name;
|
||||
struct
|
||||
{
|
||||
// R (op) s32 [update cr* in mode *]
|
||||
uint8 registerIndex;
|
||||
sint32 immS32;
|
||||
}op_r_immS32;
|
||||
struct
|
||||
{
|
||||
uint32 address;
|
||||
uint8 flags;
|
||||
}op_jumpmark;
|
||||
struct
|
||||
{
|
||||
uint32 param;
|
||||
uint32 param2;
|
||||
uint16 paramU16;
|
||||
}op_macro;
|
||||
struct
|
||||
{
|
||||
uint32 jumpmarkAddress;
|
||||
bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress
|
||||
uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup)
|
||||
uint8 crRegisterIndex;
|
||||
uint8 crBitIndex;
|
||||
bool bitMustBeSet;
|
||||
}op_conditionalJump;
|
||||
struct
|
||||
{
|
||||
uint8 registerData;
|
||||
uint8 registerMem;
|
||||
uint8 registerMem2;
|
||||
uint8 registerGQR;
|
||||
uint8 copyWidth;
|
||||
//uint8 flags;
|
||||
struct
|
||||
{
|
||||
bool swapEndian : 1;
|
||||
bool signExtend : 1;
|
||||
bool notExpanded : 1; // for floats
|
||||
}flags2;
|
||||
uint8 mode; // transfer mode (copy width, ps0/ps1 behavior)
|
||||
sint32 immS32;
|
||||
}op_storeLoad;
|
||||
struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8 registerMem;
|
||||
sint32 immS32;
|
||||
}src;
|
||||
struct
|
||||
{
|
||||
uint8 registerMem;
|
||||
sint32 immS32;
|
||||
}dst;
|
||||
uint8 copyWidth;
|
||||
}op_mem2mem;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperand;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperandA;
|
||||
uint8 registerOperandB;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
uint8 registerOperandA;
|
||||
uint8 registerOperandB;
|
||||
uint8 registerOperandC;
|
||||
uint8 flags;
|
||||
}op_fpr_r_r_r_r;
|
||||
struct
|
||||
{
|
||||
uint8 registerResult;
|
||||
//uint8 flags;
|
||||
}op_fpr_r;
|
||||
struct
|
||||
{
|
||||
uint32 ppcAddress;
|
||||
uint32 x64Offset;
|
||||
}op_ppcEnter;
|
||||
struct
|
||||
{
|
||||
uint8 crD; // crBitIndex (result)
|
||||
uint8 crA; // crBitIndex
|
||||
uint8 crB; // crBitIndex
|
||||
}op_cr;
|
||||
// conditional operations (emitted if supported by target platform)
|
||||
struct
|
||||
{
|
||||
// r_s32
|
||||
uint8 registerIndex;
|
||||
sint32 immS32;
|
||||
// condition
|
||||
uint8 crRegisterIndex;
|
||||
uint8 crBitIndex;
|
||||
bool bitMustBeSet;
|
||||
}op_conditional_r_s32;
|
||||
};
|
||||
}PPCRecImlInstruction_t;
|
||||
|
||||
typedef struct _PPCRecImlSegment_t PPCRecImlSegment_t;
|
||||
|
||||
typedef struct _ppcRecompilerSegmentPoint_t
|
||||
{
|
||||
sint32 index;
|
||||
PPCRecImlSegment_t* imlSegment;
|
||||
_ppcRecompilerSegmentPoint_t* next;
|
||||
_ppcRecompilerSegmentPoint_t* prev;
|
||||
}ppcRecompilerSegmentPoint_t;
|
||||
|
||||
struct raLivenessLocation_t
|
||||
{
|
||||
sint32 index;
|
||||
bool isRead;
|
||||
bool isWrite;
|
||||
|
||||
raLivenessLocation_t() = default;
|
||||
|
||||
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
|
||||
: index(index), isRead(isRead), isWrite(isWrite) {};
|
||||
};
|
||||
|
||||
struct raLivenessSubrangeLink_t
|
||||
{
|
||||
struct raLivenessSubrange_t* prev;
|
||||
struct raLivenessSubrange_t* next;
|
||||
};
|
||||
#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h"
|
||||
#include "Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h"
|
||||
|
||||
struct raLivenessSubrange_t
|
||||
{
|
||||
struct raLivenessRange_t* range;
|
||||
PPCRecImlSegment_t* imlSegment;
|
||||
ppcRecompilerSegmentPoint_t start;
|
||||
ppcRecompilerSegmentPoint_t end;
|
||||
// dirty state tracking
|
||||
bool _noLoad;
|
||||
bool hasStore;
|
||||
bool hasStoreDelayed;
|
||||
// next
|
||||
raLivenessSubrange_t* subrangeBranchTaken;
|
||||
raLivenessSubrange_t* subrangeBranchNotTaken;
|
||||
// processing
|
||||
uint32 lastIterationIndex;
|
||||
// instruction locations
|
||||
std::vector<raLivenessLocation_t> list_locations;
|
||||
// linked list (subranges with same GPR virtual register)
|
||||
raLivenessSubrangeLink_t link_sameVirtualRegisterGPR;
|
||||
// linked list (all subranges for this segment)
|
||||
raLivenessSubrangeLink_t link_segmentSubrangesGPR;
|
||||
};
|
||||
|
||||
struct raLivenessRange_t
|
||||
{
|
||||
sint32 virtualRegister;
|
||||
sint32 physicalRegister;
|
||||
sint32 name;
|
||||
std::vector<raLivenessSubrange_t*> list_subranges;
|
||||
};
|
||||
|
||||
struct PPCSegmentRegisterAllocatorInfo_t
|
||||
{
|
||||
// analyzer stage
|
||||
bool isPartOfProcessedLoop{}; // used during loop detection
|
||||
sint32 lastIterationIndex{};
|
||||
// linked lists
|
||||
raLivenessSubrange_t* linkedList_allSubranges{};
|
||||
raLivenessSubrange_t* linkedList_perVirtualGPR[PPC_REC_MAX_VIRTUAL_GPR]{};
|
||||
};
|
||||
|
||||
struct PPCRecVGPRDistances_t
|
||||
{
|
||||
struct _RegArrayEntry
|
||||
{
|
||||
sint32 usageStart{};
|
||||
sint32 usageEnd{};
|
||||
}reg[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{};
|
||||
};
|
||||
|
||||
typedef struct _PPCRecImlSegment_t
|
||||
{
|
||||
sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection)
|
||||
sint32 startOffset{}; // offset to first instruction in iml instruction list
|
||||
sint32 count{}; // number of instructions in segment
|
||||
uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address)
|
||||
uint32 x64Offset{}; // x64 code offset of segment start
|
||||
uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly)
|
||||
// list of intermediate instructions in this segment
|
||||
PPCRecImlInstruction_t* imlList{};
|
||||
sint32 imlListSize{};
|
||||
sint32 imlListCount{};
|
||||
// segment link
|
||||
_PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch
|
||||
_PPCRecImlSegment_t* nextSegmentBranchTaken{};
|
||||
bool nextSegmentIsUncertain{};
|
||||
sint32 loopDepth{};
|
||||
//sList_t* list_prevSegments;
|
||||
std::vector<_PPCRecImlSegment_t*> list_prevSegments{};
|
||||
// PPC range of segment
|
||||
uint32 ppcAddrMin{};
|
||||
uint32 ppcAddrMax{};
|
||||
// enterable segments
|
||||
bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
|
||||
uint32 enterPPCAddress{}; // used if isEnterable is true
|
||||
// jump destination segments
|
||||
bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps
|
||||
uint32 jumpDestinationPPCAddress{};
|
||||
// PPC FPR use mask
|
||||
bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR
|
||||
// CR use mask
|
||||
uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten)
|
||||
uint32 crBitsRead{}; // all bits that are read in this segment
|
||||
uint32 crBitsWritten{}; // bits that are written in this segment
|
||||
// register allocator info
|
||||
PPCSegmentRegisterAllocatorInfo_t raInfo{};
|
||||
PPCRecVGPRDistances_t raDistances{};
|
||||
bool raRangeExtendProcessed{};
|
||||
// segment points
|
||||
ppcRecompilerSegmentPoint_t* segmentPointList{};
|
||||
}PPCRecImlSegment_t;
|
||||
struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
struct ppcImlGenContext_t
|
||||
{
|
||||
PPCRecFunction_t* functionRef;
|
||||
class PPCFunctionBoundaryTracker* boundaryTracker;
|
||||
uint32* currentInstruction;
|
||||
uint32 ppcAddressOfCurrentInstruction;
|
||||
IMLSegment* currentOutputSegment;
|
||||
struct PPCBasicBlockInfo* currentBasicBlock{};
|
||||
// fpr mode
|
||||
bool LSQE{ true };
|
||||
bool PSE{ true };
|
||||
// cycle counter
|
||||
uint32 cyclesSinceLastBranch; // used to track ppc cycles
|
||||
// temporary general purpose registers
|
||||
uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
// temporary floating point registers (single and double precision)
|
||||
uint32 mappedFPRRegister[256];
|
||||
// list of intermediate instructions
|
||||
PPCRecImlInstruction_t* imlList;
|
||||
sint32 imlListSize;
|
||||
sint32 imlListCount;
|
||||
std::unordered_map<IMLName, IMLReg> mappedRegs;
|
||||
|
||||
uint32 GetMaxRegId() const
|
||||
{
|
||||
if (mappedRegs.empty())
|
||||
return 0;
|
||||
return mappedRegs.size()-1;
|
||||
}
|
||||
|
||||
// list of segments
|
||||
PPCRecImlSegment_t** segmentList;
|
||||
sint32 segmentListSize;
|
||||
sint32 segmentListCount;
|
||||
std::vector<IMLSegment*> segmentList2;
|
||||
// code generation control
|
||||
bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode
|
||||
// register allocator info
|
||||
struct
|
||||
{
|
||||
std::vector<raLivenessRange_t*> list_ranges;
|
||||
}raInfo;
|
||||
// analysis info
|
||||
struct
|
||||
{
|
||||
bool modifiesGQR[8];
|
||||
}tracking;
|
||||
// debug helpers
|
||||
uint32 debug_entryPPCAddress{0};
|
||||
|
||||
~ppcImlGenContext_t()
|
||||
{
|
||||
for (IMLSegment* imlSegment : segmentList2)
|
||||
delete imlSegment;
|
||||
segmentList2.clear();
|
||||
}
|
||||
|
||||
// append raw instruction
|
||||
IMLInstruction& emitInst()
|
||||
{
|
||||
return *PPCRecompilerImlGen_generateNewEmptyInstruction(this);
|
||||
}
|
||||
|
||||
IMLSegment* NewSegment()
|
||||
{
|
||||
IMLSegment* seg = new IMLSegment();
|
||||
segmentList2.emplace_back(seg);
|
||||
return seg;
|
||||
}
|
||||
|
||||
size_t GetSegmentIndex(IMLSegment* seg)
|
||||
{
|
||||
for (size_t i = 0; i < segmentList2.size(); i++)
|
||||
{
|
||||
if (segmentList2[i] == seg)
|
||||
return i;
|
||||
}
|
||||
cemu_assert_error();
|
||||
return 0;
|
||||
}
|
||||
|
||||
IMLSegment* InsertSegment(size_t index)
|
||||
{
|
||||
IMLSegment* newSeg = new IMLSegment();
|
||||
segmentList2.insert(segmentList2.begin() + index, 1, newSeg);
|
||||
return newSeg;
|
||||
}
|
||||
|
||||
std::span<IMLSegment*> InsertSegments(size_t index, size_t count)
|
||||
{
|
||||
segmentList2.insert(segmentList2.begin() + index, count, {});
|
||||
for (size_t i = index; i < (index + count); i++)
|
||||
segmentList2[i] = new IMLSegment();
|
||||
return { segmentList2.data() + index, count};
|
||||
}
|
||||
|
||||
void UpdateSegmentIndices()
|
||||
{
|
||||
for (size_t i = 0; i < segmentList2.size(); i++)
|
||||
segmentList2[i]->momentaryIndex = (sint32)i;
|
||||
}
|
||||
};
|
||||
|
||||
typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)();
|
||||
|
@ -359,11 +136,6 @@ typedef struct
|
|||
alignas(16) float _x64XMM_constFloatMin[2];
|
||||
alignas(16) uint32 _x64XMM_flushDenormalMask1[4];
|
||||
alignas(16) uint32 _x64XMM_flushDenormalMaskResetSignBits[4];
|
||||
// PSQ load/store scale tables
|
||||
double _psq_ld_scale_ps0_ps1[64 * 2];
|
||||
double _psq_ld_scale_ps0_1[64 * 2];
|
||||
double _psq_st_scale_ps0_ps1[64 * 2];
|
||||
double _psq_st_scale_ps0_1[64 * 2];
|
||||
// MXCSR
|
||||
uint32 _x64XMM_mxCsr_ftzOn;
|
||||
uint32 _x64XMM_mxCsr_ftzOff;
|
||||
|
@ -385,8 +157,6 @@ extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
|
|||
|
||||
#define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1)
|
||||
|
||||
// todo - move some of the stuff above into PPCRecompilerInternal.h
|
||||
|
||||
// recompiler interface
|
||||
|
||||
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress);
|
||||
|
|
|
@ -1,293 +1,33 @@
|
|||
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set<uint32>& entryAddresses, class PPCFunctionBoundaryTracker& boundaryTracker);
|
||||
|
||||
#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example)
|
||||
IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo);
|
||||
IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo);
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_OP_ASSIGN, // '=' operator
|
||||
PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
|
||||
PPCREC_IML_OP_ADD, // '+' operator
|
||||
PPCREC_IML_OP_SUB, // '-' operator
|
||||
PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit
|
||||
PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr)
|
||||
PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr)
|
||||
PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply)
|
||||
PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result
|
||||
PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result
|
||||
PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide)
|
||||
PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide)
|
||||
PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit
|
||||
PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit
|
||||
PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag
|
||||
PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag
|
||||
// assign operators with cast
|
||||
PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend
|
||||
PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend
|
||||
// binary operation
|
||||
PPCREC_IML_OP_OR, // '|' operator
|
||||
PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first
|
||||
PPCREC_IML_OP_AND, // '&' operator
|
||||
PPCREC_IML_OP_XOR, // '^' operator
|
||||
PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
|
||||
PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
|
||||
PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned)
|
||||
PPCREC_IML_OP_NOT, // complement each bit
|
||||
PPCREC_IML_OP_NEG, // negate
|
||||
// ppc
|
||||
PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask)
|
||||
PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag)
|
||||
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_CNTLZW,
|
||||
PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry)
|
||||
PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
|
||||
PPCREC_IML_OP_MFCR, // copy cr to gpr
|
||||
PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask)
|
||||
// condition register
|
||||
PPCREC_IML_OP_CR_CLEAR, // clear cr bit
|
||||
PPCREC_IML_OP_CR_SET, // set cr bit
|
||||
PPCREC_IML_OP_CR_OR, // OR cr bits
|
||||
PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first
|
||||
PPCREC_IML_OP_CR_AND, // AND cr bits
|
||||
PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first
|
||||
// FPU
|
||||
PPCREC_IML_OP_FPR_ADD_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_ADD_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_PAIR,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_PAIR,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
|
||||
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half
|
||||
PPCREC_IML_OP_FPR_FCMPO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_FCMPU_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_FCMPU_TOP,
|
||||
PPCREC_IML_OP_FPR_NEGATE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_NEGATE_PAIR,
|
||||
PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ABS_PAIR,
|
||||
PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision
|
||||
PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT,
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
|
||||
PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
|
||||
PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
|
||||
// PS
|
||||
PPCREC_IML_OP_FPR_SUM0,
|
||||
PPCREC_IML_OP_FPR_SUM1,
|
||||
};
|
||||
void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext);
|
||||
|
||||
#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_MACRO_BLR, // macro for BLR instruction code
|
||||
PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code
|
||||
PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code
|
||||
PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code
|
||||
PPCREC_IML_MACRO_BL, // call to different function (can be within same function)
|
||||
PPCREC_IML_MACRO_B_FAR, // branch to different function
|
||||
PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount
|
||||
PPCREC_IML_MACRO_HLE, // HLE function call
|
||||
PPCREC_IML_MACRO_MFTB, // get TB register value (low or high)
|
||||
PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter
|
||||
// debugging
|
||||
PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_JUMP_CONDITION_NONE,
|
||||
PPCREC_JUMP_CONDITION_E, // equal / zero
|
||||
PPCREC_JUMP_CONDITION_NE, // not equal / not zero
|
||||
PPCREC_JUMP_CONDITION_LE, // less or equal
|
||||
PPCREC_JUMP_CONDITION_L, // less
|
||||
PPCREC_JUMP_CONDITION_GE, // greater or equal
|
||||
PPCREC_JUMP_CONDITION_G, // greater
|
||||
// special case:
|
||||
PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling
|
||||
PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow
|
||||
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_CR_MODE_COMPARE_SIGNED,
|
||||
PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare
|
||||
// others: PPCREC_CR_MODE_ARITHMETIC,
|
||||
PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code)
|
||||
PPCREC_CR_MODE_LOGICAL,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_IML_TYPE_NONE,
|
||||
PPCREC_IML_TYPE_NO_OP, // no-op instruction
|
||||
PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction)
|
||||
PPCREC_IML_TYPE_R_R, // r* (op) *r
|
||||
PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
|
||||
PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
|
||||
PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
|
||||
PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
|
||||
PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
|
||||
PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r*
|
||||
PPCREC_IML_TYPE_R_NAME, // r* = name
|
||||
PPCREC_IML_TYPE_NAME_R, // name* = r*
|
||||
PPCREC_IML_TYPE_R_S32, // r* (op) imm
|
||||
PPCREC_IML_TYPE_MACRO,
|
||||
PPCREC_IML_TYPE_CJUMP, // conditional jump
|
||||
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0
|
||||
PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable
|
||||
PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands)
|
||||
// conditional
|
||||
PPCREC_IML_TYPE_CONDITIONAL_R_S32,
|
||||
// FPR
|
||||
PPCREC_IML_TYPE_FPR_R_NAME, // name = f*
|
||||
PPCREC_IML_TYPE_FPR_NAME_R, // f* = name
|
||||
PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode)
|
||||
PPCREC_IML_TYPE_FPR_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R_R_R_R,
|
||||
PPCREC_IML_TYPE_FPR_R,
|
||||
// special
|
||||
PPCREC_IML_TYPE_MEM2MEM, // memory to memory copy (deprecated)
|
||||
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
PPCREC_NAME_NONE,
|
||||
PPCREC_NAME_TEMPORARY,
|
||||
PPCREC_NAME_R0 = 1000,
|
||||
PPCREC_NAME_SPR0 = 2000,
|
||||
PPCREC_NAME_FPR0 = 3000,
|
||||
PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7
|
||||
//PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away)
|
||||
};
|
||||
|
||||
// special cases for LOAD/STORE
|
||||
#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value)
|
||||
#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid)
|
||||
#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1
|
||||
#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2
|
||||
#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3
|
||||
#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1
|
||||
#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2
|
||||
#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3
|
||||
|
||||
#define PPC_REC_INVALID_REGISTER 0xFF
|
||||
|
||||
#define PPCREC_CR_BIT_LT 0
|
||||
#define PPCREC_CR_BIT_GT 1
|
||||
#define PPCREC_CR_BIT_EQ 2
|
||||
#define PPCREC_CR_BIT_SO 3
|
||||
|
||||
enum
|
||||
{
|
||||
// fpr load
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1,
|
||||
// fpr store
|
||||
PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0
|
||||
PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0
|
||||
|
||||
PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
|
||||
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1,
|
||||
};
|
||||
|
||||
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set<uint32>& entryAddresses);
|
||||
void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount);
|
||||
PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index);
|
||||
IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount);
|
||||
IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 index);
|
||||
|
||||
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count);
|
||||
|
||||
void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index);
|
||||
void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint);
|
||||
void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index);
|
||||
void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint);
|
||||
|
||||
// GPR register management
|
||||
uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
|
||||
uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
// Register management
|
||||
IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat);
|
||||
|
||||
// FPR register management
|
||||
uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
|
||||
uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
|
||||
// IML instruction generation
|
||||
void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0);
|
||||
|
||||
|
||||
|
||||
// IML instruction generation (new style, can generate new instructions but also overwrite existing ones)
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth);
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
|
||||
|
||||
// IML generation - FPU
|
||||
bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
|
||||
bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
|
||||
bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
|
||||
bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool hasUpdate, bool isDouble);
|
||||
bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
|
@ -313,22 +53,17 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
|
||||
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
|
||||
bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
|
||||
bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
|
||||
bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative);
|
||||
bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
|
@ -347,76 +82,20 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o
|
|||
|
||||
// IML general
|
||||
|
||||
bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml);
|
||||
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew);
|
||||
void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst);
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment);
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken);
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken); // no else segment
|
||||
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex);
|
||||
|
||||
// IML analyzer
|
||||
typedef struct
|
||||
class IMLRedirectInstOutput
|
||||
{
|
||||
uint32 readCRBits;
|
||||
uint32 writtenCRBits;
|
||||
}PPCRecCRTracking_t;
|
||||
|
||||
bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment);
|
||||
bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction);
|
||||
void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking);
|
||||
|
||||
// IML optimizer
|
||||
bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// IML register allocator
|
||||
void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// late optimizations
|
||||
void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// debug
|
||||
|
||||
void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
|
||||
public:
|
||||
IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment);
|
||||
~IMLRedirectInstOutput();
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
sint16 readNamedReg1;
|
||||
sint16 readNamedReg2;
|
||||
sint16 readNamedReg3;
|
||||
sint16 writtenNamedReg1;
|
||||
};
|
||||
sint16 gpr[4]; // 3 read + 1 write
|
||||
};
|
||||
// FPR
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
// note: If destination operand is not fully written, it will be added as a read FPR as well
|
||||
sint16 readFPR1;
|
||||
sint16 readFPR2;
|
||||
sint16 readFPR3;
|
||||
sint16 readFPR4; // usually this is set to the result FPR if only partially overwritten
|
||||
sint16 writtenFPR1;
|
||||
};
|
||||
sint16 fpr[4];
|
||||
};
|
||||
}PPCImlOptimizerUsedRegisters_t;
|
||||
|
||||
void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed);
|
||||
private:
|
||||
ppcImlGenContext_t* m_context;
|
||||
IMLSegment* m_prevSegment;
|
||||
};
|
|
@ -1,137 +0,0 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "util/helpers/fixedSizeList.h"
|
||||
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
|
||||
|
||||
/*
|
||||
* Initializes a single segment and returns true if it is a finite loop
|
||||
*/
|
||||
bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
bool isTightFiniteLoop = false;
|
||||
// base criteria, must jump to beginning of same segment
|
||||
if (imlSegment->nextSegmentBranchTaken != imlSegment)
|
||||
return false;
|
||||
// loops using BDNZ are assumed to always be finite
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB && imlSegment->imlList[t].crRegister == 8)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// for non-BDNZ loops, check for common patterns
|
||||
// risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB)
|
||||
// this catches most loops with load-update and store-update instructions, but also those with decrementing counters
|
||||
FixedSizeList<sint32, 64, true> list_modifiedRegisters;
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB) )
|
||||
{
|
||||
list_modifiedRegisters.addUnique(imlSegment->imlList[t].op_r_immS32.registerIndex);
|
||||
}
|
||||
}
|
||||
if (list_modifiedRegisters.count > 0)
|
||||
{
|
||||
// remove all registers from the list that are modified by non-ADD/SUB instructions
|
||||
// todo: We should also cover the case where ADD+SUB on the same register cancel the effect out
|
||||
PPCImlOptimizerUsedRegisters_t registersUsed;
|
||||
for (sint32 t = 0; t < imlSegment->imlListCount; t++)
|
||||
{
|
||||
if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB))
|
||||
continue;
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + t, ®istersUsed);
|
||||
if(registersUsed.writtenNamedReg1 < 0)
|
||||
continue;
|
||||
list_modifiedRegisters.remove(registersUsed.writtenNamedReg1);
|
||||
}
|
||||
if (list_modifiedRegisters.count > 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister)
|
||||
*/
|
||||
bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction)
|
||||
{
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R)
|
||||
return true;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking)
|
||||
{
|
||||
crTracking->readCRBits = 0;
|
||||
crTracking->writtenCRBits = 0;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP)
|
||||
{
|
||||
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex);
|
||||
crTracking->readCRBits = (crBitFlag);
|
||||
}
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex);
|
||||
crTracking->readCRBits = crBitFlag;
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR)
|
||||
{
|
||||
crTracking->readCRBits = 0xFFFFFFFF;
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF)
|
||||
{
|
||||
crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_CR)
|
||||
{
|
||||
if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_SET)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
|
||||
crTracking->writtenCRBits = crBitFlag;
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_AND ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
|
||||
crTracking->writtenCRBits = crBitFlag;
|
||||
crBitFlag = 1 << (imlInstruction->op_cr.crA);
|
||||
crTracking->readCRBits = crBitFlag;
|
||||
crBitFlag = 1 << (imlInstruction->op_cr.crB);
|
||||
crTracking->readCRBits |= crBitFlag;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
else if (PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7)
|
||||
{
|
||||
crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4));
|
||||
}
|
||||
else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
|
||||
{
|
||||
// overwrites CR0
|
||||
crTracking->writtenCRBits |= (0xF << 0);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -1,399 +0,0 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "PPCRecompilerImlRanges.h"
|
||||
#include "util/helpers/MemoryPool.h"
|
||||
|
||||
void PPCRecRARange_addLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if ((*root) && (*root)->range->virtualRegister != subrange->range->virtualRegister)
|
||||
assert_dbg();
|
||||
#endif
|
||||
subrange->link_sameVirtualRegisterGPR.next = *root;
|
||||
if (*root)
|
||||
(*root)->link_sameVirtualRegisterGPR.prev = subrange;
|
||||
subrange->link_sameVirtualRegisterGPR.prev = nullptr;
|
||||
*root = subrange;
|
||||
}
|
||||
|
||||
void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
subrange->link_segmentSubrangesGPR.next = *root;
|
||||
if (*root)
|
||||
(*root)->link_segmentSubrangesGPR.prev = subrange;
|
||||
subrange->link_segmentSubrangesGPR.prev = nullptr;
|
||||
*root = subrange;
|
||||
}
|
||||
|
||||
void PPCRecRARange_removeLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
raLivenessSubrange_t* tempPrev = subrange->link_sameVirtualRegisterGPR.prev;
|
||||
if (subrange->link_sameVirtualRegisterGPR.prev)
|
||||
subrange->link_sameVirtualRegisterGPR.prev->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next;
|
||||
else
|
||||
(*root) = subrange->link_sameVirtualRegisterGPR.next;
|
||||
if (subrange->link_sameVirtualRegisterGPR.next)
|
||||
subrange->link_sameVirtualRegisterGPR.next->link_sameVirtualRegisterGPR.prev = tempPrev;
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
subrange->link_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1;
|
||||
subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1;
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRARange_removeLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
raLivenessSubrange_t* tempPrev = subrange->link_segmentSubrangesGPR.prev;
|
||||
if (subrange->link_segmentSubrangesGPR.prev)
|
||||
subrange->link_segmentSubrangesGPR.prev->link_segmentSubrangesGPR.next = subrange->link_segmentSubrangesGPR.next;
|
||||
else
|
||||
(*root) = subrange->link_segmentSubrangesGPR.next;
|
||||
if (subrange->link_segmentSubrangesGPR.next)
|
||||
subrange->link_segmentSubrangesGPR.next->link_segmentSubrangesGPR.prev = tempPrev;
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
subrange->link_segmentSubrangesGPR.prev = (raLivenessSubrange_t*)1;
|
||||
subrange->link_segmentSubrangesGPR.next = (raLivenessSubrange_t*)1;
|
||||
#endif
|
||||
}
|
||||
|
||||
MemoryPoolPermanentObjects<raLivenessRange_t> memPool_livenessRange(4096);
|
||||
MemoryPoolPermanentObjects<raLivenessSubrange_t> memPool_livenessSubrange(4096);
|
||||
|
||||
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name)
|
||||
{
|
||||
raLivenessRange_t* livenessRange = memPool_livenessRange.acquireObj();
|
||||
livenessRange->list_subranges.resize(0);
|
||||
livenessRange->virtualRegister = virtualRegister;
|
||||
livenessRange->name = name;
|
||||
livenessRange->physicalRegister = -1;
|
||||
ppcImlGenContext->raInfo.list_ranges.push_back(livenessRange);
|
||||
return livenessRange;
|
||||
}
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex)
|
||||
{
|
||||
raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj();
|
||||
livenessSubrange->list_locations.resize(0);
|
||||
livenessSubrange->range = range;
|
||||
livenessSubrange->imlSegment = imlSegment;
|
||||
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->start, imlSegment, startIndex);
|
||||
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->end, imlSegment, endIndex);
|
||||
// default values
|
||||
livenessSubrange->hasStore = false;
|
||||
livenessSubrange->hasStoreDelayed = false;
|
||||
livenessSubrange->lastIterationIndex = 0;
|
||||
livenessSubrange->subrangeBranchNotTaken = nullptr;
|
||||
livenessSubrange->subrangeBranchTaken = nullptr;
|
||||
livenessSubrange->_noLoad = false;
|
||||
// add to range
|
||||
range->list_subranges.push_back(livenessSubrange);
|
||||
// add to segment
|
||||
PPCRecRARange_addLink_perVirtualGPR(&(imlSegment->raInfo.linkedList_perVirtualGPR[range->virtualRegister]), livenessSubrange);
|
||||
PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange);
|
||||
return livenessSubrange;
|
||||
}
|
||||
|
||||
void _unlinkSubrange(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = subrange->imlSegment;
|
||||
PPCRecRARange_removeLink_perVirtualGPR(&imlSegment->raInfo.linkedList_perVirtualGPR[subrange->range->virtualRegister], subrange);
|
||||
PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
_unlinkSubrange(subrange);
|
||||
subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange));
|
||||
subrange->list_locations.clear();
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
|
||||
memPool_livenessSubrange.releaseObj(subrange);
|
||||
}
|
||||
|
||||
void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange)
|
||||
{
|
||||
_unlinkSubrange(subrange);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
|
||||
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
|
||||
memPool_livenessSubrange.releaseObj(subrange);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
|
||||
}
|
||||
ppcImlGenContext->raInfo.list_ranges.erase(std::find(ppcImlGenContext->raInfo.list_ranges.begin(), ppcImlGenContext->raInfo.list_ranges.end(), range));
|
||||
memPool_livenessRange.releaseObj(range);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
|
||||
}
|
||||
memPool_livenessRange.releaseObj(range);
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext, range);
|
||||
}
|
||||
ppcImlGenContext->raInfo.list_ranges.clear();
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange)
|
||||
{
|
||||
cemu_assert_debug(range != absorbedRange);
|
||||
cemu_assert_debug(range->virtualRegister == absorbedRange->virtualRegister);
|
||||
// move all subranges from absorbedRange to range
|
||||
for (auto& subrange : absorbedRange->list_subranges)
|
||||
{
|
||||
range->list_subranges.push_back(subrange);
|
||||
subrange->range = range;
|
||||
}
|
||||
absorbedRange->list_subranges.clear();
|
||||
PPCRecRA_deleteRange(ppcImlGenContext, absorbedRange);
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
PPCRecRA_debugValidateSubrange(absorbedSubrange);
|
||||
if (subrange->imlSegment != absorbedSubrange->imlSegment)
|
||||
assert_dbg();
|
||||
if (subrange->end.index > absorbedSubrange->start.index)
|
||||
assert_dbg();
|
||||
if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
|
||||
assert_dbg();
|
||||
if (subrange == absorbedSubrange)
|
||||
assert_dbg();
|
||||
#endif
|
||||
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
|
||||
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
|
||||
|
||||
// merge usage locations
|
||||
for (auto& location : absorbedSubrange->list_locations)
|
||||
{
|
||||
subrange->list_locations.push_back(location);
|
||||
}
|
||||
absorbedSubrange->list_locations.clear();
|
||||
|
||||
subrange->end.index = absorbedSubrange->end.index;
|
||||
|
||||
PPCRecRA_debugValidateSubrange(subrange);
|
||||
|
||||
PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange);
|
||||
}
|
||||
|
||||
// remove all inter-segment connections from the range and split it into local ranges (also removes empty ranges)
|
||||
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
|
||||
{
|
||||
if (range->list_subranges.size() == 1)
|
||||
assert_dbg();
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
continue;
|
||||
raLivenessRange_t* newRange = PPCRecRA_createRangeBase(ppcImlGenContext, range->virtualRegister, range->name);
|
||||
raLivenessSubrange_t* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, newRange, subrange->imlSegment, subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1);
|
||||
// copy locations
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
newSubrange->list_locations.push_back(location);
|
||||
}
|
||||
}
|
||||
// remove original range
|
||||
PPCRecRA_deleteRange(ppcImlGenContext, range);
|
||||
}
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
// validate subrange
|
||||
if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken)
|
||||
assert_dbg();
|
||||
if (subrange->subrangeBranchNotTaken && subrange->subrangeBranchNotTaken->imlSegment != subrange->imlSegment->nextSegmentBranchNotTaken)
|
||||
assert_dbg();
|
||||
}
|
||||
#else
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {}
|
||||
#endif
|
||||
|
||||
// split subrange at the given index
|
||||
// After the split there will be two ranges/subranges:
|
||||
// head -> subrange is shortned to end at splitIndex
|
||||
// tail -> a new subrange that reaches from splitIndex to the end of the original subrange
|
||||
// if head has a physical register assigned it will not carry over to tail
|
||||
// The return value is the tail subrange
|
||||
// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations
|
||||
// Ranges that begin at RA_INTER_RANGE_START are allowed and can be split
|
||||
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole)
|
||||
{
|
||||
// validation
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START)
|
||||
assert_dbg();
|
||||
if (subrange->start.index >= splitIndex)
|
||||
assert_dbg();
|
||||
if (subrange->end.index <= splitIndex)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// create tail
|
||||
raLivenessRange_t* tailRange = PPCRecRA_createRangeBase(ppcImlGenContext, subrange->range->virtualRegister, subrange->range->name);
|
||||
raLivenessSubrange_t* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, tailRange, subrange->imlSegment, splitIndex, subrange->end.index);
|
||||
// copy locations
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if (location.index >= splitIndex)
|
||||
tailSubrange->list_locations.push_back(location);
|
||||
}
|
||||
// remove tail locations from head
|
||||
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
|
||||
{
|
||||
raLivenessLocation_t* location = subrange->list_locations.data() + i;
|
||||
if (location->index >= splitIndex)
|
||||
{
|
||||
subrange->list_locations.resize(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// adjust start/end
|
||||
if (trimToHole)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
subrange->end.index = subrange->start.index+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
subrange->end.index = subrange->list_locations.back().index + 1;
|
||||
}
|
||||
if (tailSubrange->list_locations.empty())
|
||||
{
|
||||
assert_dbg(); // should not happen? (In this case we can just avoid generating a tail at all)
|
||||
}
|
||||
else
|
||||
{
|
||||
tailSubrange->start.index = tailSubrange->list_locations.front().index;
|
||||
}
|
||||
}
|
||||
return tailSubrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
return;
|
||||
}
|
||||
raLivenessLocation_t* lastLocation = subrange->list_locations.data() + (subrange->list_locations.size() - 1);
|
||||
cemu_assert_debug(lastLocation->index <= index);
|
||||
if (lastLocation->index == index)
|
||||
{
|
||||
// update
|
||||
lastLocation->isRead = lastLocation->isRead || isRead;
|
||||
lastLocation->isWrite = lastLocation->isWrite || isWrite;
|
||||
return;
|
||||
}
|
||||
// add new
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
}
|
||||
|
||||
sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
sint32 v = imlSegment->loopDepth + 1;
|
||||
v *= 5;
|
||||
return v*v; // 25, 100, 225, 400
|
||||
}
|
||||
|
||||
// calculate cost of entire range
|
||||
// ignores data flow and does not detect avoidable reads/stores
|
||||
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range)
|
||||
{
|
||||
sint32 cost = 0;
|
||||
|
||||
// todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code).
|
||||
|
||||
// currently we calculate the cost based on the most expensive entry/exit point
|
||||
|
||||
sint32 mostExpensiveRead = 0;
|
||||
sint32 mostExpensiveWrite = 0;
|
||||
sint32 readCount = 0;
|
||||
sint32 writeCount = 0;
|
||||
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->start.index != RA_INTER_RANGE_START)
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
|
||||
readCount++;
|
||||
}
|
||||
if (subrange->end.index != RA_INTER_RANGE_END)
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
|
||||
writeCount++;
|
||||
}
|
||||
}
|
||||
cost = mostExpensiveRead + mostExpensiveWrite;
|
||||
cost = cost + (readCount + writeCount) / 10;
|
||||
return cost;
|
||||
}
|
||||
|
||||
// calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range)
|
||||
{
|
||||
sint32 cost = -PPCRecRARange_estimateCost(range);
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
continue;
|
||||
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // we assume a read and a store
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex)
|
||||
{
|
||||
// validation
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (subrange->end.index == RA_INTER_RANGE_END)
|
||||
assert_dbg();
|
||||
#endif
|
||||
|
||||
sint32 cost = 0;
|
||||
// find split position in location list
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
assert_dbg(); // should not happen?
|
||||
return 0;
|
||||
}
|
||||
if (splitIndex <= subrange->list_locations.front().index)
|
||||
return 0;
|
||||
if (splitIndex > subrange->list_locations.back().index)
|
||||
return 0;
|
||||
|
||||
// todo - determine exact cost of split subranges
|
||||
|
||||
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store
|
||||
|
||||
//for (sint32 f = 0; f < subrange->list_locations.size(); f++)
|
||||
//{
|
||||
// raLivenessLocation_t* location = subrange->list_locations.data() + f;
|
||||
// if (location->index >= splitIndex)
|
||||
// {
|
||||
// ...
|
||||
// return cost;
|
||||
// }
|
||||
//}
|
||||
|
||||
return cost;
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name);
|
||||
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex);
|
||||
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange);
|
||||
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
|
||||
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange);
|
||||
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
|
||||
|
||||
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange);
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole = false);
|
||||
|
||||
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite);
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange);
|
||||
|
||||
// cost estimation
|
||||
sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment);
|
||||
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range);
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range);
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex);
|
||||
|
||||
// special values to mark the index of ranges that reach across the segment border
|
||||
#define RA_INTER_RANGE_START (-1)
|
||||
#define RA_INTER_RANGE_END (0x70000000)
|
File diff suppressed because it is too large
Load diff
|
@ -1,414 +0,0 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "PPCRecompilerImlRanges.h"
|
||||
#include <queue>
|
||||
|
||||
bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX);
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
imlSegment->raDistances.reg[i].usageStart = INT_MAX;
|
||||
imlSegment->raDistances.reg[i].usageEnd = INT_MIN;
|
||||
}
|
||||
// scan instructions for usage range
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR);
|
||||
imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction
|
||||
imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index+1); // index after instruction
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// for each register calculate min/max index of usage range within each segment
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, ppcImlGenContext->segmentList[s]);
|
||||
}
|
||||
}
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range)
|
||||
{
|
||||
if (imlSegment->raDistances.isProcessed[vGPR])
|
||||
{
|
||||
// return already existing segment
|
||||
return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR];
|
||||
}
|
||||
imlSegment->raDistances.isProcessed[vGPR] = true;
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
return nullptr;
|
||||
// create subrange
|
||||
cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr);
|
||||
raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd);
|
||||
// traverse forward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
}
|
||||
// traverse backward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range);
|
||||
}
|
||||
}
|
||||
// return subrange
|
||||
return subrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
if( _isRangeDefined(imlSegment, i) == false )
|
||||
continue;
|
||||
if( imlSegment->raDistances.isProcessed[i])
|
||||
continue;
|
||||
raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]);
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range);
|
||||
}
|
||||
// create lookup table of ranges
|
||||
raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i];
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// parse instructions and convert to locations
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
// handle accessed GPR
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
bool isWrite = (t == 3);
|
||||
// add location
|
||||
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (index < vGPR2Subrange[virtualRegister]->start.index)
|
||||
assert_dbg();
|
||||
if (index+1 > vGPR2Subrange[virtualRegister]->end.index)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
return;
|
||||
}
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START;
|
||||
}
|
||||
else
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
}
|
||||
// propagate backwards
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR);
|
||||
}
|
||||
}
|
||||
|
||||
void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (routeDepth < 2)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// extend starting range to end of segment
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR);
|
||||
// extend all the connecting segments in both directions
|
||||
for (sint32 i = 1; i < (routeDepth - 1); i++)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
}
|
||||
// extend the final segment towards the beginning
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth-1], vGPR);
|
||||
}
|
||||
|
||||
void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
if (routeDepth >= 64)
|
||||
{
|
||||
cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded for function 0x{:08x}", ppcImlGenContext->functionRef->ppcAddress);
|
||||
return;
|
||||
}
|
||||
route[routeDepth] = currentSegment;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX)
|
||||
{
|
||||
// measure distance to end of segment
|
||||
distanceLeft -= currentSegment->imlListCount;
|
||||
if (distanceLeft > 0)
|
||||
{
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// measure distance to range
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (distanceLeft < currentSegment->imlListCount)
|
||||
return; // range too far away
|
||||
}
|
||||
else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft)
|
||||
return; // out of range
|
||||
// found close range -> connect ranges
|
||||
_PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// count instructions to end of initial segment
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START)
|
||||
assert_dbg();
|
||||
sint32 instructionsUntilEndOfSeg;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
instructionsUntilEndOfSeg = 0;
|
||||
else
|
||||
instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd;
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (instructionsUntilEndOfSeg < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
|
||||
if (remainingScanDist <= 0)
|
||||
return; // can't reach end
|
||||
|
||||
// also dont forget: Extending is easier if we allow 'non symetric' branches. E.g. register range one enters one branch
|
||||
PPCRecImlSegment_t* route[64];
|
||||
route[0] = currentSegment;
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if(imlSegment->raDistances.reg[i].usageStart == INT_MAX)
|
||||
continue; // not used
|
||||
// check and extend if possible
|
||||
PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i);
|
||||
}
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
assert_dbg();
|
||||
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
std::vector<PPCRecImlSegment_t*> list_segments;
|
||||
list_segments.reserve(1000);
|
||||
sint32 index = 0;
|
||||
imlSegment->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(imlSegment);
|
||||
while (index < list_segments.size())
|
||||
{
|
||||
PPCRecImlSegment_t* currentSegment = list_segments[index];
|
||||
PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment);
|
||||
// follow flow
|
||||
if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
if (imlSegment->list_prevSegments.empty())
|
||||
{
|
||||
if (imlSegment->raRangeExtendProcessed)
|
||||
assert_dbg(); // should not happen
|
||||
PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
auto localLoopDepth = imlSegment->loopDepth;
|
||||
if( localLoopDepth <= 0 )
|
||||
continue; // not inside a loop
|
||||
// look for loop exit
|
||||
bool hasLoopExit = false;
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if(hasLoopExit == false)
|
||||
continue;
|
||||
|
||||
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END)
|
||||
continue; // range not set or does not reach end of segment
|
||||
if(imlSegment->nextSegmentBranchTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i);
|
||||
if(imlSegment->nextSegmentBranchNotTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// merge close ranges
|
||||
PPCRecRA_mergeCloseRangesV2(ppcImlGenContext);
|
||||
// extra pass to move register stores out of loops
|
||||
PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext);
|
||||
// calculate liveness ranges
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
bool isRead = false;
|
||||
bool isWritten = false;
|
||||
bool isOverwritten = false;
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if (location.isRead)
|
||||
{
|
||||
isRead = true;
|
||||
}
|
||||
if (location.isWrite)
|
||||
{
|
||||
if (isRead == false)
|
||||
isOverwritten = true;
|
||||
isWritten = true;
|
||||
}
|
||||
}
|
||||
subrange->_noLoad = isOverwritten;
|
||||
subrange->hasStore = isWritten;
|
||||
|
||||
if (subrange->start.index == RA_INTER_RANGE_START)
|
||||
subrange->_noLoad = true;
|
||||
}
|
||||
|
||||
void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange);
|
||||
|
||||
void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore
|
||||
// first do a per-subrange pass
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
PPCRecRA_analyzeSubrangeDataDependencyV2(subrange);
|
||||
}
|
||||
}
|
||||
// then do a second pass where we scan along subrange flow
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm
|
||||
{
|
||||
_analyzeRangeDataFlow(subrange);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,173 +1,26 @@
|
|||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
|
||||
PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
if( ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset )
|
||||
{
|
||||
return ppcImlGenContext->segmentList[s];
|
||||
}
|
||||
}
|
||||
debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
// make sure segments aren't already linked
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
|
||||
return;
|
||||
// add as next segment for source
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken != NULL)
|
||||
assert_dbg();
|
||||
imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst;
|
||||
// add as previous segment for destination
|
||||
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
// make sure segments aren't already linked
|
||||
if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
|
||||
return;
|
||||
// add as next segment for source
|
||||
if (imlSegmentSrc->nextSegmentBranchTaken != NULL)
|
||||
assert_dbg();
|
||||
imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst;
|
||||
// add as previous segment for destination
|
||||
imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc);
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst)
|
||||
{
|
||||
if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst)
|
||||
{
|
||||
imlSegmentSrc->nextSegmentBranchNotTaken = NULL;
|
||||
}
|
||||
else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst)
|
||||
{
|
||||
imlSegmentSrc->nextSegmentBranchTaken = NULL;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
|
||||
bool matchFound = false;
|
||||
for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++)
|
||||
{
|
||||
if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc)
|
||||
{
|
||||
imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin()+i);
|
||||
matchFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matchFound == false)
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
/*
|
||||
* Replaces all links to segment orig with linkts to segment new
|
||||
*/
|
||||
void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew)
|
||||
{
|
||||
while (imlSegmentOrig->list_prevSegments.size() != 0)
|
||||
{
|
||||
PPCRecImlSegment_t* prevSegment = imlSegmentOrig->list_prevSegments[0];
|
||||
if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig)
|
||||
{
|
||||
PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(prevSegment, imlSegmentNew);
|
||||
}
|
||||
else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig)
|
||||
{
|
||||
PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig);
|
||||
PPCRecompilerIml_setLinkBranchTaken(prevSegment, imlSegmentNew);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
|
||||
bool isLastSegment = (s+1)>=ppcImlGenContext->segmentListCount;
|
||||
PPCRecImlSegment_t* nextSegment = isLastSegment?NULL:ppcImlGenContext->segmentList[s+1];
|
||||
// handle empty segment
|
||||
if( imlSegment->imlListCount == 0 )
|
||||
{
|
||||
if (isLastSegment == false)
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList[s+1]); // continue execution to next segment
|
||||
else
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
continue;
|
||||
}
|
||||
// check last instruction of segment
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+(imlSegment->imlListCount-1);
|
||||
if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
|
||||
{
|
||||
// find destination segment by ppc jump address
|
||||
PPCRecImlSegment_t* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress);
|
||||
if( jumpDestSegment )
|
||||
{
|
||||
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment);
|
||||
PPCRecompilerIml_setLinkBranchTaken(imlSegment, jumpDestSegment);
|
||||
}
|
||||
else
|
||||
{
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
}
|
||||
else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
|
||||
{
|
||||
// currently we assume that the next segment is unknown for all macros
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// all other instruction types do not branch
|
||||
//imlSegment->nextSegment[0] = nextSegment;
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment);
|
||||
//imlSegment->nextSegmentIsUncertain = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
sint32 initialSegmentCount = ppcImlGenContext->segmentListCount;
|
||||
for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++)
|
||||
size_t initialSegmentCount = ppcImlGenContext->segmentList2.size();
|
||||
for (size_t i = 0; i < initialSegmentCount; i++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i];
|
||||
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[i];
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
{
|
||||
// spawn new segment at end
|
||||
PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentListCount, 1);
|
||||
PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount-1];
|
||||
PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentList2.size(), 1);
|
||||
IMLSegment* entrySegment = ppcImlGenContext->segmentList2[ppcImlGenContext->segmentList2.size()-1];
|
||||
entrySegment->isEnterable = true;
|
||||
entrySegment->enterPPCAddress = imlSegment->enterPPCAddress;
|
||||
// create jump instruction
|
||||
PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1);
|
||||
PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList + 0);
|
||||
PPCRecompilerIml_setLinkBranchTaken(entrySegment, imlSegment);
|
||||
entrySegment->imlList.data()[0].make_jump();
|
||||
IMLSegment_SetLinkBranchTaken(entrySegment, imlSegment);
|
||||
// remove enterable flag from original segment
|
||||
imlSegment->isEnterable = false;
|
||||
imlSegment->enterPPCAddress = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
if (imlSegment->imlListCount == 0)
|
||||
return nullptr;
|
||||
return imlSegment->imlList + (imlSegment->imlListCount - 1);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -47,8 +47,6 @@ struct LatteGPUState_t
|
|||
gx2GPUSharedArea_t* sharedArea; // quick reference to shared area
|
||||
MPTR sharedAreaAddr;
|
||||
// other
|
||||
// todo: Currently we have the command buffer logic implemented as a FIFO ringbuffer. On real HW it's handled as a series of command buffers that are pushed individually.
|
||||
std::atomic<uint64> lastSubmittedCommandBufferTimestamp;
|
||||
uint32 gx2InitCalled; // incremented every time GX2Init() is called
|
||||
// OpenGL control
|
||||
uint32 glVendor; // GLVENDOR_*
|
||||
|
@ -75,8 +73,6 @@ struct LatteGPUState_t
|
|||
|
||||
extern LatteGPUState_t LatteGPUState;
|
||||
|
||||
extern uint8* gxRingBufferReadPtr; // currently active read pointer (gx2 ring buffer or display list)
|
||||
|
||||
// texture
|
||||
|
||||
#include "Cafe/HW/Latte/Core/LatteTexture.h"
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "Cafe/HW/Latte/Core/LattePM4.h"
|
||||
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
|
||||
#include "Cafe/OS/libs/TCL/TCL.h" // TCL currently handles the GPU command ringbuffer
|
||||
|
||||
#include "Cafe/CafeSystem.h"
|
||||
|
||||
|
@ -28,11 +29,6 @@ typedef uint32be* LatteCMDPtr;
|
|||
#define LatteReadCMD() ((uint32)*(cmd++))
|
||||
#define LatteSkipCMD(_nWords) cmd += (_nWords)
|
||||
|
||||
uint8* gxRingBufferReadPtr; // currently active read pointer (gx2 ring buffer or display list)
|
||||
uint8* gx2CPParserDisplayListPtr;
|
||||
uint8* gx2CPParserDisplayListStart; // used for debugging
|
||||
uint8* gx2CPParserDisplayListEnd;
|
||||
|
||||
void LatteThread_HandleOSScreen();
|
||||
|
||||
void LatteThread_Exit();
|
||||
|
@ -155,16 +151,12 @@ void LatteCP_signalEnterWait()
|
|||
*/
|
||||
uint32 LatteCP_readU32Deprc()
|
||||
{
|
||||
uint32 v;
|
||||
uint8* gxRingBufferWritePtr;
|
||||
sint32 readDistance;
|
||||
// no display list active
|
||||
while (true)
|
||||
{
|
||||
gxRingBufferWritePtr = gx2WriteGatherPipe.writeGatherPtrGxBuffer[GX2::sGX2MainCoreIndex];
|
||||
readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr);
|
||||
if (readDistance != 0)
|
||||
break;
|
||||
uint32 cmdWord;
|
||||
if ( TCL::TCLGPUReadRBWord(cmdWord) )
|
||||
return cmdWord;
|
||||
|
||||
g_renderer->NotifyLatteCommandProcessorIdle(); // let the renderer know in case it wants to flush any commands
|
||||
performanceMonitor.gpuTime_idleTime.beginMeasuring();
|
||||
|
@ -175,56 +167,8 @@ uint32 LatteCP_readU32Deprc()
|
|||
}
|
||||
LatteThread_HandleOSScreen(); // check if new frame was presented via OSScreen API
|
||||
|
||||
readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr);
|
||||
if (readDistance != 0)
|
||||
break;
|
||||
if (Latte_GetStopSignal())
|
||||
LatteThread_Exit();
|
||||
|
||||
// still no command data available, do some other tasks
|
||||
LatteTiming_HandleTimedVsync();
|
||||
LatteAsyncCommands_checkAndExecute();
|
||||
std::this_thread::yield();
|
||||
performanceMonitor.gpuTime_idleTime.endMeasuring();
|
||||
}
|
||||
v = *(uint32*)gxRingBufferReadPtr;
|
||||
gxRingBufferReadPtr += 4;
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (v == 0xcdcdcdcd)
|
||||
assert_dbg();
|
||||
#endif
|
||||
v = _swapEndianU32(v);
|
||||
return v;
|
||||
}
|
||||
|
||||
void LatteCP_waitForNWords(uint32 numWords)
|
||||
{
|
||||
uint8* gxRingBufferWritePtr;
|
||||
sint32 readDistance;
|
||||
bool isFlushed = false;
|
||||
sint32 waitDistance = numWords * sizeof(uint32be);
|
||||
// no display list active
|
||||
while (true)
|
||||
{
|
||||
gxRingBufferWritePtr = gx2WriteGatherPipe.writeGatherPtrGxBuffer[GX2::sGX2MainCoreIndex];
|
||||
readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr);
|
||||
if (readDistance < 0)
|
||||
return; // wrap around means there is at least one full command queued after this
|
||||
if (readDistance >= waitDistance)
|
||||
break;
|
||||
g_renderer->NotifyLatteCommandProcessorIdle(); // let the renderer know in case it wants to flush any commands
|
||||
performanceMonitor.gpuTime_idleTime.beginMeasuring();
|
||||
// no command data available, spin in a busy loop for a while then check again
|
||||
for (sint32 busy = 0; busy < 80; busy++)
|
||||
{
|
||||
_mm_pause();
|
||||
}
|
||||
readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr);
|
||||
if (readDistance < 0)
|
||||
return; // wrap around means there is at least one full command queued after this
|
||||
if (readDistance >= waitDistance)
|
||||
break;
|
||||
|
||||
if ( TCL::TCLGPUReadRBWord(cmdWord) )
|
||||
return cmdWord;
|
||||
if (Latte_GetStopSignal())
|
||||
LatteThread_Exit();
|
||||
|
||||
|
@ -234,6 +178,7 @@ void LatteCP_waitForNWords(uint32 numWords)
|
|||
std::this_thread::yield();
|
||||
performanceMonitor.gpuTime_idleTime.endMeasuring();
|
||||
}
|
||||
UNREACHABLE;
|
||||
}
|
||||
|
||||
template<uint32 readU32()>
|
||||
|
@ -270,21 +215,23 @@ void LatteCP_itIndirectBufferDepr(LatteCMDPtr cmd, uint32 nWords)
|
|||
cemu_assert_debug(nWords == 3);
|
||||
uint32 physicalAddress = LatteReadCMD();
|
||||
uint32 physicalAddressHigh = LatteReadCMD(); // unused
|
||||
uint32 sizeInDWords = LatteReadCMD();
|
||||
uint32 displayListSize = sizeInDWords * 4;
|
||||
DrawPassContext drawPassCtx;
|
||||
uint32 sizeInU32s = LatteReadCMD();
|
||||
|
||||
#ifdef LATTE_CP_LOGGING
|
||||
if (GetAsyncKeyState('A'))
|
||||
LatteCP_DebugPrintCmdBuffer(MEMPTR<uint32be>(physicalAddress), displayListSize);
|
||||
#endif
|
||||
|
||||
uint32be* buf = MEMPTR<uint32be>(physicalAddress).GetPtr();
|
||||
drawPassCtx.PushCurrentCommandQueuePos(buf, buf, buf + sizeInDWords);
|
||||
if (sizeInU32s > 0)
|
||||
{
|
||||
DrawPassContext drawPassCtx;
|
||||
uint32be* buf = MEMPTR<uint32be>(physicalAddress).GetPtr();
|
||||
drawPassCtx.PushCurrentCommandQueuePos(buf, buf, buf + sizeInU32s);
|
||||
|
||||
LatteCP_processCommandBuffer(drawPassCtx);
|
||||
if (drawPassCtx.isWithinDrawPass())
|
||||
drawPassCtx.endDrawPass();
|
||||
LatteCP_processCommandBuffer(drawPassCtx);
|
||||
if (drawPassCtx.isWithinDrawPass())
|
||||
drawPassCtx.endDrawPass();
|
||||
}
|
||||
}
|
||||
|
||||
// pushes the command buffer to the stack
|
||||
|
@ -294,11 +241,12 @@ void LatteCP_itIndirectBuffer(LatteCMDPtr cmd, uint32 nWords, DrawPassContext& d
|
|||
uint32 physicalAddress = LatteReadCMD();
|
||||
uint32 physicalAddressHigh = LatteReadCMD(); // unused
|
||||
uint32 sizeInDWords = LatteReadCMD();
|
||||
uint32 displayListSize = sizeInDWords * 4;
|
||||
cemu_assert_debug(displayListSize >= 4);
|
||||
|
||||
uint32be* buf = MEMPTR<uint32be>(physicalAddress).GetPtr();
|
||||
drawPassCtx.PushCurrentCommandQueuePos(buf, buf, buf + sizeInDWords);
|
||||
if (sizeInDWords > 0)
|
||||
{
|
||||
uint32 displayListSize = sizeInDWords * 4;
|
||||
uint32be* buf = MEMPTR<uint32be>(physicalAddress).GetPtr();
|
||||
drawPassCtx.PushCurrentCommandQueuePos(buf, buf, buf + sizeInDWords);
|
||||
}
|
||||
}
|
||||
|
||||
LatteCMDPtr LatteCP_itStreamoutBufferUpdate(LatteCMDPtr cmd, uint32 nWords)
|
||||
|
@ -565,26 +513,55 @@ LatteCMDPtr LatteCP_itMemWrite(LatteCMDPtr cmd, uint32 nWords)
|
|||
if (word1 == 0x40000)
|
||||
{
|
||||
// write U32
|
||||
*memPtr = word2;
|
||||
stdx::atomic_ref<uint32be> atomicRef(*memPtr);
|
||||
atomicRef.store(word2);
|
||||
}
|
||||
else if (word1 == 0x00000)
|
||||
{
|
||||
// write U64 (as two U32)
|
||||
// note: The U32s are swapped
|
||||
memPtr[0] = word2;
|
||||
memPtr[1] = word3;
|
||||
// write U64
|
||||
// note: The U32s are swapped here, but needs verification. Also, it seems like the two U32 halves are written independently and the U64 as a whole is not atomic -> investiagte
|
||||
stdx::atomic_ref<uint64be> atomicRef(*(uint64be*)memPtr);
|
||||
atomicRef.store(((uint64le)word2 << 32) | word3);
|
||||
}
|
||||
else if (word1 == 0x20000)
|
||||
{
|
||||
// write U64 (little endian)
|
||||
memPtr[0] = _swapEndianU32(word2);
|
||||
memPtr[1] = _swapEndianU32(word3);
|
||||
stdx::atomic_ref<uint64le> atomicRef(*(uint64le*)memPtr);
|
||||
atomicRef.store(((uint64le)word3 << 32) | word2);
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
return cmd;
|
||||
}
|
||||
|
||||
LatteCMDPtr LatteCP_itEventWriteEOP(LatteCMDPtr cmd, uint32 nWords)
|
||||
{
|
||||
cemu_assert_debug(nWords == 5);
|
||||
uint32 word0 = LatteReadCMD();
|
||||
uint32 word1 = LatteReadCMD();
|
||||
uint32 word2 = LatteReadCMD();
|
||||
uint32 word3 = LatteReadCMD(); // value low bits
|
||||
uint32 word4 = LatteReadCMD(); // value high bits
|
||||
|
||||
cemu_assert_debug(word2 == 0x40000000 || word2 == 0x42000000);
|
||||
|
||||
if (word0 == 0x504 && (word2&0x40000000)) // todo - figure out the flags
|
||||
{
|
||||
stdx::atomic_ref<uint64be> atomicRef(*(uint64be*)memory_getPointerFromPhysicalOffset(word1));
|
||||
uint64 val = ((uint64)word4 << 32) | word3;
|
||||
atomicRef.store(val);
|
||||
}
|
||||
else
|
||||
{ cemu_assert_unimplemented();
|
||||
}
|
||||
bool triggerInterrupt = (word2 & 0x2000000) != 0;
|
||||
if (triggerInterrupt)
|
||||
{
|
||||
// todo - timestamp interrupt
|
||||
}
|
||||
TCL::TCLGPUNotifyNewRetirementTimestamp();
|
||||
return cmd;
|
||||
}
|
||||
|
||||
LatteCMDPtr LatteCP_itMemSemaphore(LatteCMDPtr cmd, uint32 nWords)
|
||||
{
|
||||
|
@ -783,16 +760,6 @@ LatteCMDPtr LatteCP_itDrawImmediate(LatteCMDPtr cmd, uint32 nWords, DrawPassCont
|
|||
|
||||
drawPassCtx.executeDraw(count, false, _tempIndexArrayMPTR);
|
||||
return cmd;
|
||||
|
||||
}
|
||||
|
||||
LatteCMDPtr LatteCP_itHLEFifoWrapAround(LatteCMDPtr cmd, uint32 nWords)
|
||||
{
|
||||
cemu_assert_debug(nWords == 1);
|
||||
uint32 unused = LatteReadCMD();
|
||||
gxRingBufferReadPtr = gx2WriteGatherPipe.gxRingBuffer;
|
||||
cmd = (LatteCMDPtr)gxRingBufferReadPtr;
|
||||
return cmd;
|
||||
}
|
||||
|
||||
LatteCMDPtr LatteCP_itHLESampleTimer(LatteCMDPtr cmd, uint32 nWords)
|
||||
|
@ -819,16 +786,6 @@ LatteCMDPtr LatteCP_itHLESpecialState(LatteCMDPtr cmd, uint32 nWords)
|
|||
return cmd;
|
||||
}
|
||||
|
||||
LatteCMDPtr LatteCP_itHLESetRetirementTimestamp(LatteCMDPtr cmd, uint32 nWords)
|
||||
{
|
||||
cemu_assert_debug(nWords == 2);
|
||||
uint32 timestampHigh = (uint32)LatteReadCMD();
|
||||
uint32 timestampLow = (uint32)LatteReadCMD();
|
||||
uint64 timestamp = ((uint64)timestampHigh << 32ULL) | (uint64)timestampLow;
|
||||
GX2::__GX2NotifyNewRetirementTimestamp(timestamp);
|
||||
return cmd;
|
||||
}
|
||||
|
||||
LatteCMDPtr LatteCP_itHLEBeginOcclusionQuery(LatteCMDPtr cmd, uint32 nWords)
|
||||
{
|
||||
cemu_assert_debug(nWords == 1);
|
||||
|
@ -1145,9 +1102,10 @@ void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx)
|
|||
LatteCMDPtr cmd, cmdStart, cmdEnd;
|
||||
if (!drawPassCtx.PopCurrentCommandQueuePos(cmd, cmdStart, cmdEnd))
|
||||
break;
|
||||
uint32 itHeader;
|
||||
while (cmd < cmdEnd)
|
||||
{
|
||||
uint32 itHeader = LatteReadCMD();
|
||||
itHeader = LatteReadCMD();
|
||||
uint32 itHeaderType = (itHeader >> 30) & 3;
|
||||
if (itHeaderType == 3)
|
||||
{
|
||||
|
@ -1361,11 +1319,6 @@ void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx)
|
|||
LatteCP_itHLEEndOcclusionQuery(cmdData, nWords);
|
||||
break;
|
||||
}
|
||||
case IT_HLE_SET_CB_RETIREMENT_TIMESTAMP:
|
||||
{
|
||||
LatteCP_itHLESetRetirementTimestamp(cmdData, nWords);
|
||||
break;
|
||||
}
|
||||
case IT_HLE_BOTTOM_OF_PIPE_CB:
|
||||
{
|
||||
LatteCP_itHLEBottomOfPipeCB(cmdData, nWords);
|
||||
|
@ -1421,6 +1374,7 @@ void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx)
|
|||
void LatteCP_ProcessRingbuffer()
|
||||
{
|
||||
sint32 timerRecheck = 0; // estimates how much CP processing time has elapsed based on the executed commands, if the value exceeds CP_TIMER_RECHECK then _handleTimers() is called
|
||||
uint32be tmpBuffer[128];
|
||||
while (true)
|
||||
{
|
||||
uint32 itHeader = LatteCP_readU32Deprc();
|
||||
|
@ -1429,10 +1383,13 @@ void LatteCP_ProcessRingbuffer()
|
|||
{
|
||||
uint32 itCode = (itHeader >> 8) & 0xFF;
|
||||
uint32 nWords = ((itHeader >> 16) & 0x3FFF) + 1;
|
||||
LatteCP_waitForNWords(nWords);
|
||||
LatteCMDPtr cmd = (LatteCMDPtr)gxRingBufferReadPtr;
|
||||
uint8* cmdEnd = gxRingBufferReadPtr + nWords * 4;
|
||||
gxRingBufferReadPtr = cmdEnd;
|
||||
cemu_assert(nWords < 128);
|
||||
for (sint32 i=0; i<nWords; i++)
|
||||
{
|
||||
uint32 word = LatteCP_readU32Deprc();
|
||||
tmpBuffer[i] = word;
|
||||
}
|
||||
LatteCMDPtr cmd = (LatteCMDPtr)tmpBuffer;
|
||||
switch (itCode)
|
||||
{
|
||||
case IT_SURFACE_SYNC:
|
||||
|
@ -1599,6 +1556,11 @@ void LatteCP_ProcessRingbuffer()
|
|||
timerRecheck += CP_TIMER_RECHECK / 512;
|
||||
break;
|
||||
}
|
||||
case IT_EVENT_WRITE_EOP:
|
||||
{
|
||||
LatteCP_itEventWriteEOP(cmd, nWords);
|
||||
break;
|
||||
}
|
||||
case IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER:
|
||||
{
|
||||
LatteCP_itHLECopyColorBufferToScanBuffer(cmd, nWords);
|
||||
|
@ -1637,12 +1599,6 @@ void LatteCP_ProcessRingbuffer()
|
|||
timerRecheck += CP_TIMER_RECHECK / 128;
|
||||
break;
|
||||
}
|
||||
case IT_HLE_FIFO_WRAP_AROUND:
|
||||
{
|
||||
LatteCP_itHLEFifoWrapAround(cmd, nWords);
|
||||
timerRecheck += CP_TIMER_RECHECK / 512;
|
||||
break;
|
||||
}
|
||||
case IT_HLE_SAMPLE_TIMER:
|
||||
{
|
||||
LatteCP_itHLESampleTimer(cmd, nWords);
|
||||
|
@ -1667,12 +1623,6 @@ void LatteCP_ProcessRingbuffer()
|
|||
timerRecheck += CP_TIMER_RECHECK / 512;
|
||||
break;
|
||||
}
|
||||
case IT_HLE_SET_CB_RETIREMENT_TIMESTAMP:
|
||||
{
|
||||
LatteCP_itHLESetRetirementTimestamp(cmd, nWords);
|
||||
timerRecheck += CP_TIMER_RECHECK / 512;
|
||||
break;
|
||||
}
|
||||
case IT_HLE_BOTTOM_OF_PIPE_CB:
|
||||
{
|
||||
LatteCP_itHLEBottomOfPipeCB(cmd, nWords);
|
||||
|
@ -1933,11 +1883,6 @@ void LatteCP_DebugPrintCmdBuffer(uint32be* bufferPtr, uint32 size)
|
|||
cemuLog_log(LogType::Force, "{} IT_HLE_COPY_SURFACE_NEW", strPrefix);
|
||||
break;
|
||||
}
|
||||
case IT_HLE_FIFO_WRAP_AROUND:
|
||||
{
|
||||
cemuLog_log(LogType::Force, "{} IT_HLE_FIFO_WRAP_AROUND", strPrefix);
|
||||
break;
|
||||
}
|
||||
case IT_HLE_SAMPLE_TIMER:
|
||||
{
|
||||
cemuLog_log(LogType::Force, "{} IT_HLE_SAMPLE_TIMER", strPrefix);
|
||||
|
@ -1958,11 +1903,6 @@ void LatteCP_DebugPrintCmdBuffer(uint32be* bufferPtr, uint32 size)
|
|||
cemuLog_log(LogType::Force, "{} IT_HLE_END_OCCLUSION_QUERY", strPrefix);
|
||||
break;
|
||||
}
|
||||
case IT_HLE_SET_CB_RETIREMENT_TIMESTAMP:
|
||||
{
|
||||
cemuLog_log(LogType::Force, "{} IT_HLE_SET_CB_RETIREMENT_TIMESTAMP", strPrefix);
|
||||
break;
|
||||
}
|
||||
case IT_HLE_BOTTOM_OF_PIPE_CB:
|
||||
{
|
||||
cemuLog_log(LogType::Force, "{} IT_HLE_BOTTOM_OF_PIPE_CB", strPrefix);
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#if defined(ARCH_X86_64) && defined(__GNUC__)
|
||||
#include <immintrin.h>
|
||||
#elif defined(__aarch64__)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
struct
|
||||
|
@ -502,6 +504,114 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat
|
|||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
void LatteIndices_fastConvertU16_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
const uint16* indicesU16BE = (const uint16*)indexDataInput;
|
||||
uint16* indexOutput = (uint16*)indexDataOutput;
|
||||
sint32 count8 = count >> 3;
|
||||
sint32 countRemaining = count & 7;
|
||||
|
||||
if (count8)
|
||||
{
|
||||
uint16x8_t mMin = vdupq_n_u16(0xFFFF);
|
||||
uint16x8_t mMax = vdupq_n_u16(0x0000);
|
||||
uint16x8_t mTemp;
|
||||
uint16x8_t* mRawIndices = (uint16x8_t*) indicesU16BE;
|
||||
indicesU16BE += count8 * 8;
|
||||
uint16x8_t* mOutputIndices = (uint16x8_t*) indexOutput;
|
||||
indexOutput += count8 * 8;
|
||||
|
||||
while (count8--)
|
||||
{
|
||||
mTemp = vld1q_u16((uint16*)mRawIndices);
|
||||
mRawIndices++;
|
||||
mTemp = vrev16q_u8(mTemp);
|
||||
mMin = vminq_u16(mMin, mTemp);
|
||||
mMax = vmaxq_u16(mMax, mTemp);
|
||||
vst1q_u16((uint16*)mOutputIndices, mTemp);
|
||||
mOutputIndices++;
|
||||
}
|
||||
|
||||
uint16* mMaxU16 = (uint16*)&mMax;
|
||||
uint16* mMinU16 = (uint16*)&mMin;
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
indexMax = std::max(indexMax, (uint32)mMaxU16[i]);
|
||||
indexMin = std::min(indexMin, (uint32)mMinU16[i]);
|
||||
}
|
||||
}
|
||||
// process remaining indices
|
||||
uint32 _minIndex = 0xFFFFFFFF;
|
||||
uint32 _maxIndex = 0;
|
||||
for (sint32 i = countRemaining; (--i) >= 0;)
|
||||
{
|
||||
uint16 idx = _swapEndianU16(*indicesU16BE);
|
||||
*indexOutput = idx;
|
||||
indexOutput++;
|
||||
indicesU16BE++;
|
||||
_maxIndex = std::max(_maxIndex, (uint32)idx);
|
||||
_minIndex = std::min(_minIndex, (uint32)idx);
|
||||
}
|
||||
// update min/max
|
||||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
|
||||
void LatteIndices_fastConvertU32_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
const uint32* indicesU32BE = (const uint32*)indexDataInput;
|
||||
uint32* indexOutput = (uint32*)indexDataOutput;
|
||||
sint32 count8 = count >> 2;
|
||||
sint32 countRemaining = count & 3;
|
||||
|
||||
if (count8)
|
||||
{
|
||||
uint32x4_t mMin = vdupq_n_u32(0xFFFFFFFF);
|
||||
uint32x4_t mMax = vdupq_n_u32(0x00000000);
|
||||
uint32x4_t mTemp;
|
||||
uint32x4_t* mRawIndices = (uint32x4_t*) indicesU32BE;
|
||||
indicesU32BE += count8 * 4;
|
||||
uint32x4_t* mOutputIndices = (uint32x4_t*) indexOutput;
|
||||
indexOutput += count8 * 4;
|
||||
|
||||
while (count8--)
|
||||
{
|
||||
mTemp = vld1q_u32((uint32*)mRawIndices);
|
||||
mRawIndices++;
|
||||
mTemp = vrev32q_u8(mTemp);
|
||||
mMin = vminq_u32(mMin, mTemp);
|
||||
mMax = vmaxq_u32(mMax, mTemp);
|
||||
vst1q_u32((uint32*)mOutputIndices, mTemp);
|
||||
mOutputIndices++;
|
||||
}
|
||||
|
||||
uint32* mMaxU32 = (uint32*)&mMax;
|
||||
uint32* mMinU32 = (uint32*)&mMin;
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
indexMax = std::max(indexMax, mMaxU32[i]);
|
||||
indexMin = std::min(indexMin, mMinU32[i]);
|
||||
}
|
||||
}
|
||||
// process remaining indices
|
||||
uint32 _minIndex = 0xFFFFFFFF;
|
||||
uint32 _maxIndex = 0;
|
||||
for (sint32 i = countRemaining; (--i) >= 0;)
|
||||
{
|
||||
uint32 idx = _swapEndianU32(*indicesU32BE);
|
||||
*indexOutput = idx;
|
||||
indexOutput++;
|
||||
indicesU32BE++;
|
||||
_maxIndex = std::max(_maxIndex, idx);
|
||||
_minIndex = std::min(_minIndex, idx);
|
||||
}
|
||||
// update min/max
|
||||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
|
@ -688,27 +798,31 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
|
|||
{
|
||||
if (indexType == LatteIndexType::U16_BE)
|
||||
{
|
||||
#if defined(ARCH_X86_64)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
LatteIndices_fastConvertU16_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else if (g_CPUFeatures.x86.sse4_1 && g_CPUFeatures.x86.ssse3)
|
||||
LatteIndices_fastConvertU16_SSE41(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
#elif defined(__aarch64__)
|
||||
LatteIndices_fastConvertU16_NEON(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else if (indexType == LatteIndexType::U32_BE)
|
||||
{
|
||||
#if defined(ARCH_X86_64)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
LatteIndices_fastConvertU32_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
#elif defined(__aarch64__)
|
||||
LatteIndices_fastConvertU32_NEON(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else if (indexType == LatteIndexType::U16_LE)
|
||||
{
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#define IT_MEM_WRITE 0x3D
|
||||
#define IT_SURFACE_SYNC 0x43
|
||||
#define IT_EVENT_WRITE 0x46
|
||||
#define IT_EVENT_WRITE_EOP 0x47 // end of pipe
|
||||
|
||||
#define IT_LOAD_CONFIG_REG 0x60
|
||||
#define IT_LOAD_CONTEXT_REG 0x61
|
||||
|
@ -47,14 +48,12 @@
|
|||
#define IT_HLE_WAIT_FOR_FLIP 0xF1
|
||||
#define IT_HLE_BOTTOM_OF_PIPE_CB 0xF2
|
||||
#define IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER 0xF3
|
||||
#define IT_HLE_FIFO_WRAP_AROUND 0xF4
|
||||
#define IT_HLE_CLEAR_COLOR_DEPTH_STENCIL 0xF5
|
||||
#define IT_HLE_SAMPLE_TIMER 0xF7
|
||||
#define IT_HLE_TRIGGER_SCANBUFFER_SWAP 0xF8
|
||||
#define IT_HLE_SPECIAL_STATE 0xF9
|
||||
#define IT_HLE_BEGIN_OCCLUSION_QUERY 0xFA
|
||||
#define IT_HLE_END_OCCLUSION_QUERY 0xFB
|
||||
#define IT_HLE_SET_CB_RETIREMENT_TIMESTAMP 0xFD
|
||||
|
||||
#define pm4HeaderType3(__itCode, __dataDWordCount) (0xC0000000|((uint32)(__itCode)<<8)|((uint32)((__dataDWordCount)-1)<<16))
|
||||
#define pm4HeaderType2Filler() (0x80000000)
|
||||
|
|
|
@ -209,7 +209,7 @@ class BootSoundPlayer
|
|||
|
||||
try
|
||||
{
|
||||
bootSndAudioDev = IAudioAPI::CreateDeviceFromConfig(true, sampleRate, nChannels, samplesPerBlock, bitsPerSample);
|
||||
bootSndAudioDev = IAudioAPI::CreateDeviceFromConfig(IAudioAPI::AudioType::TV, sampleRate, nChannels, samplesPerBlock, bitsPerSample);
|
||||
if(!bootSndAudioDev)
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -207,7 +207,6 @@ int Latte_ThreadEntry()
|
|||
if (Latte_GetStopSignal())
|
||||
LatteThread_Exit();
|
||||
}
|
||||
gxRingBufferReadPtr = gx2WriteGatherPipe.gxRingBuffer;
|
||||
LatteCP_ProcessRingbuffer();
|
||||
cemu_assert_debug(false); // should never reach
|
||||
return 0;
|
||||
|
@ -235,6 +234,8 @@ void Latte_Start()
|
|||
void Latte_Stop()
|
||||
{
|
||||
std::unique_lock _lock(sLatteThreadStateMutex);
|
||||
if (!sLatteThreadRunning)
|
||||
return;
|
||||
sLatteThreadRunning = false;
|
||||
_lock.unlock();
|
||||
sLatteThread.join();
|
||||
|
|
|
@ -187,8 +187,8 @@ std::string RendererOutputShader::GetOpenGlVertexSource(bool render_upside_down)
|
|||
// vertex shader
|
||||
std::ostringstream vertex_source;
|
||||
vertex_source <<
|
||||
R"(#version 400
|
||||
out vec2 passUV;
|
||||
R"(#version 420
|
||||
layout(location = 0) smooth out vec2 passUV;
|
||||
|
||||
out gl_PerVertex
|
||||
{
|
||||
|
@ -297,7 +297,7 @@ uniform vec2 nativeResolution;
|
|||
uniform vec2 outputResolution;
|
||||
#endif
|
||||
|
||||
layout(location = 0) in vec2 passUV;
|
||||
layout(location = 0) smooth in vec2 passUV;
|
||||
layout(binding = 0) uniform sampler2D textureSrc;
|
||||
layout(location = 0) out vec4 colorOut0;
|
||||
)" + shaderSrc;
|
||||
|
|
|
@ -221,11 +221,14 @@ public:
|
|||
VKRObjectPipeline();
|
||||
~VKRObjectPipeline() override;
|
||||
|
||||
void setPipeline(VkPipeline newPipeline);
|
||||
void SetPipeline(VkPipeline newPipeline);
|
||||
VkPipeline GetPipeline() const { return m_pipeline; }
|
||||
|
||||
VkPipeline pipeline = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout vertexDSL = VK_NULL_HANDLE, pixelDSL = VK_NULL_HANDLE, geometryDSL = VK_NULL_HANDLE;
|
||||
VkPipelineLayout pipeline_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_vertexDSL = VK_NULL_HANDLE, m_pixelDSL = VK_NULL_HANDLE, m_geometryDSL = VK_NULL_HANDLE;
|
||||
VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE;
|
||||
|
||||
private:
|
||||
VkPipeline m_pipeline = VK_NULL_HANDLE;
|
||||
};
|
||||
|
||||
class VKRObjectDescriptorSet : public VKRDestructibleObject
|
||||
|
|
|
@ -26,7 +26,6 @@ PipelineInfo::PipelineInfo(uint64 minimalStateHash, uint64 pipelineHash, LatteFe
|
|||
|
||||
// init VKRObjPipeline
|
||||
m_vkrObjPipeline = new VKRObjectPipeline();
|
||||
m_vkrObjPipeline->pipeline = VK_NULL_HANDLE;
|
||||
|
||||
// track dependency with shaders
|
||||
if (vertexShaderVk)
|
||||
|
|
|
@ -558,8 +558,8 @@ void PipelineCompiler::InitRasterizerState(const LatteContextRegister& latteRegi
|
|||
rasterizerExt.flags = 0;
|
||||
|
||||
rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
|
||||
rasterizer.pNext = &rasterizerExt;
|
||||
rasterizer.rasterizerDiscardEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
|
||||
rasterizer.pNext = VulkanRenderer::GetInstance()->m_featureControl.deviceExtensions.depth_clip_enable ? &rasterizerExt : nullptr;
|
||||
// GX2SetSpecialState(0, true) workaround
|
||||
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
|
||||
rasterizer.rasterizerDiscardEnable = false;
|
||||
|
@ -730,7 +730,7 @@ void PipelineCompiler::InitDescriptorSetLayouts(VulkanRenderer* vkRenderer, Pipe
|
|||
{
|
||||
cemu_assert_debug(descriptorSetLayoutCount == 0);
|
||||
CreateDescriptorSetLayout(vkRenderer, vertexShader, descriptorSetLayout[descriptorSetLayoutCount], vkrPipelineInfo);
|
||||
vkObjPipeline->vertexDSL = descriptorSetLayout[descriptorSetLayoutCount];
|
||||
vkObjPipeline->m_vertexDSL = descriptorSetLayout[descriptorSetLayoutCount];
|
||||
descriptorSetLayoutCount++;
|
||||
}
|
||||
|
||||
|
@ -738,7 +738,7 @@ void PipelineCompiler::InitDescriptorSetLayouts(VulkanRenderer* vkRenderer, Pipe
|
|||
{
|
||||
cemu_assert_debug(descriptorSetLayoutCount == 1);
|
||||
CreateDescriptorSetLayout(vkRenderer, pixelShader, descriptorSetLayout[descriptorSetLayoutCount], vkrPipelineInfo);
|
||||
vkObjPipeline->pixelDSL = descriptorSetLayout[descriptorSetLayoutCount];
|
||||
vkObjPipeline->m_pixelDSL = descriptorSetLayout[descriptorSetLayoutCount];
|
||||
descriptorSetLayoutCount++;
|
||||
}
|
||||
else if (geometryShader)
|
||||
|
@ -757,7 +757,7 @@ void PipelineCompiler::InitDescriptorSetLayouts(VulkanRenderer* vkRenderer, Pipe
|
|||
{
|
||||
cemu_assert_debug(descriptorSetLayoutCount == 2);
|
||||
CreateDescriptorSetLayout(vkRenderer, geometryShader, descriptorSetLayout[descriptorSetLayoutCount], vkrPipelineInfo);
|
||||
vkObjPipeline->geometryDSL = descriptorSetLayout[descriptorSetLayoutCount];
|
||||
vkObjPipeline->m_geometryDSL = descriptorSetLayout[descriptorSetLayoutCount];
|
||||
descriptorSetLayoutCount++;
|
||||
}
|
||||
}
|
||||
|
@ -873,7 +873,7 @@ void PipelineCompiler::InitDynamicState(PipelineInfo* pipelineInfo, bool usesBle
|
|||
dynamicState.pDynamicStates = dynamicStates.data();
|
||||
}
|
||||
|
||||
bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj)
|
||||
bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj, bool requireRobustBufferAccess)
|
||||
{
|
||||
VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance();
|
||||
|
||||
|
@ -888,6 +888,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
|
|||
m_vkGeometryShader = pipelineInfo->geometryShaderVk;
|
||||
m_vkrObjPipeline = pipelineInfo->m_vkrObjPipeline;
|
||||
m_renderPassObj = renderPassObj;
|
||||
m_requestRobustBufferAccess = requireRobustBufferAccess;
|
||||
|
||||
// if required generate RECT emulation geometry shader
|
||||
if (!vkRenderer->m_featureControl.deviceExtensions.nv_fill_rectangle && isPrimitiveRect)
|
||||
|
@ -918,7 +919,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
|
|||
pipelineLayoutInfo.pPushConstantRanges = nullptr;
|
||||
pipelineLayoutInfo.pushConstantRangeCount = 0;
|
||||
|
||||
VkResult result = vkCreatePipelineLayout(vkRenderer->m_logicalDevice, &pipelineLayoutInfo, nullptr, &m_pipeline_layout);
|
||||
VkResult result = vkCreatePipelineLayout(vkRenderer->m_logicalDevice, &pipelineLayoutInfo, nullptr, &m_pipelineLayout);
|
||||
if (result != VK_SUCCESS)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result);
|
||||
|
@ -936,7 +937,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
|
|||
|
||||
// ##########################################################################################################################################
|
||||
|
||||
pipelineInfo->m_vkrObjPipeline->pipeline_layout = m_pipeline_layout;
|
||||
pipelineInfo->m_vkrObjPipeline->m_pipelineLayout = m_pipelineLayout;
|
||||
|
||||
// increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them
|
||||
m_vkrObjPipeline->incRef();
|
||||
|
@ -989,7 +990,7 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show
|
|||
pipelineInfo.pRasterizationState = &rasterizer;
|
||||
pipelineInfo.pMultisampleState = &multisampling;
|
||||
pipelineInfo.pColorBlendState = &colorBlending;
|
||||
pipelineInfo.layout = m_pipeline_layout;
|
||||
pipelineInfo.layout = m_pipelineLayout;
|
||||
pipelineInfo.renderPass = m_renderPassObj->m_renderPass;
|
||||
pipelineInfo.pDepthStencilState = &depthStencilState;
|
||||
pipelineInfo.subpass = 0;
|
||||
|
@ -998,6 +999,8 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show
|
|||
if (!forceCompile)
|
||||
pipelineInfo.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT;
|
||||
|
||||
void* prevStruct = nullptr;
|
||||
|
||||
VkPipelineCreationFeedbackCreateInfoEXT creationFeedbackInfo;
|
||||
VkPipelineCreationFeedbackEXT creationFeedback;
|
||||
std::vector<VkPipelineCreationFeedbackEXT> creationStageFeedback(0);
|
||||
|
@ -1015,9 +1018,25 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show
|
|||
creationFeedbackInfo.pPipelineCreationFeedback = &creationFeedback;
|
||||
creationFeedbackInfo.pPipelineStageCreationFeedbacks = creationStageFeedback.data();
|
||||
creationFeedbackInfo.pipelineStageCreationFeedbackCount = pipelineInfo.stageCount;
|
||||
pipelineInfo.pNext = &creationFeedbackInfo;
|
||||
creationFeedbackInfo.pNext = prevStruct;
|
||||
prevStruct = &creationFeedbackInfo;
|
||||
}
|
||||
|
||||
VkPipelineRobustnessCreateInfoEXT pipelineRobustnessCreateInfo{};
|
||||
if (vkRenderer->m_featureControl.deviceExtensions.pipeline_robustness && m_requestRobustBufferAccess)
|
||||
{
|
||||
// per-pipeline handling of robust buffer access, if the extension is not available then we fall back to device feature robustBufferAccess
|
||||
pipelineRobustnessCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_ROBUSTNESS_CREATE_INFO_EXT;
|
||||
pipelineRobustnessCreateInfo.pNext = prevStruct;
|
||||
prevStruct = &pipelineRobustnessCreateInfo;
|
||||
pipelineRobustnessCreateInfo.storageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
|
||||
pipelineRobustnessCreateInfo.uniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
|
||||
pipelineRobustnessCreateInfo.vertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT;
|
||||
pipelineRobustnessCreateInfo.images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT;
|
||||
}
|
||||
|
||||
pipelineInfo.pNext = prevStruct;
|
||||
|
||||
VkPipeline pipeline = VK_NULL_HANDLE;
|
||||
VkResult result;
|
||||
uint8 retryCount = 0;
|
||||
|
@ -1037,7 +1056,7 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show
|
|||
}
|
||||
else if (result == VK_SUCCESS)
|
||||
{
|
||||
m_vkrObjPipeline->setPipeline(pipeline);
|
||||
m_vkrObjPipeline->SetPipeline(pipeline);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1075,3 +1094,31 @@ void PipelineCompiler::TrackAsCached(uint64 baseHash, uint64 pipelineStateHash)
|
|||
return;
|
||||
pipelineCache.AddCurrentStateToCache(baseHash, pipelineStateHash);
|
||||
}
|
||||
|
||||
// calculate whether the pipeline requires robust buffer access
|
||||
// if there is a potential risk for a shader to do out-of-bounds reads or writes we need to enable robust buffer access
|
||||
// this can happen when:
|
||||
// - Streamout is used with too small of a buffer (probably? Could also be some issue with how the streamout array index is calculated -> We can maybe fix this in the future)
|
||||
// - The shader uses dynamic indices for uniform access. This will trigger the uniform mode to be FULL_CBANK
|
||||
bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader)
|
||||
{
|
||||
bool requiresRobustBufferAcces = false;
|
||||
if (vertexShader)
|
||||
{
|
||||
cemu_assert_debug(vertexShader->shaderType == LatteConst::ShaderType::Vertex);
|
||||
requiresRobustBufferAcces |= vertexShader->hasStreamoutBufferWrite;
|
||||
requiresRobustBufferAcces |= vertexShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
|
||||
}
|
||||
if (geometryShader)
|
||||
{
|
||||
cemu_assert_debug(geometryShader->shaderType == LatteConst::ShaderType::Geometry);
|
||||
requiresRobustBufferAcces |= geometryShader->hasStreamoutBufferWrite;
|
||||
requiresRobustBufferAcces |= geometryShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
|
||||
}
|
||||
if (pixelShader)
|
||||
{
|
||||
cemu_assert_debug(pixelShader->shaderType == LatteConst::ShaderType::Pixel);
|
||||
requiresRobustBufferAcces |= pixelShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
|
||||
}
|
||||
return requiresRobustBufferAcces;
|
||||
}
|
||||
|
|
|
@ -38,11 +38,14 @@ public:
|
|||
RendererShaderVk* m_vkPixelShader{};
|
||||
RendererShaderVk* m_vkGeometryShader{};
|
||||
|
||||
bool InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj);
|
||||
bool InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj, bool requireRobustBufferAccess);
|
||||
void TrackAsCached(uint64 baseHash, uint64 pipelineStateHash); // stores pipeline to permanent cache if not yet cached. Must be called synchronously from render thread due to dependency on GPU state
|
||||
|
||||
VkPipelineLayout m_pipeline_layout;
|
||||
static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
|
||||
|
||||
VkPipelineLayout m_pipelineLayout;
|
||||
VKRObjectRenderPass* m_renderPassObj{};
|
||||
bool m_requestRobustBufferAccess{false};
|
||||
|
||||
/* shader stages */
|
||||
std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
|
||||
|
|
|
@ -277,8 +277,9 @@ void VulkanPipelineStableCache::LoadPipelineFromCache(std::span<uint8> fileData)
|
|||
m_pipelineIsCachedLock.unlock();
|
||||
// compile
|
||||
{
|
||||
PipelineCompiler pp;
|
||||
if (!pp.InitFromCurrentGPUState(pipelineInfo, *lcr, renderPass))
|
||||
PipelineCompiler pipelineCompiler;
|
||||
bool requiresRobustBufferAccess = PipelineCompiler::CalcRobustBufferAccessRequirement(vertexShader, pixelShader, geometryShader);
|
||||
if (!pipelineCompiler.InitFromCurrentGPUState(pipelineInfo, *lcr, renderPass, requiresRobustBufferAccess))
|
||||
{
|
||||
s_spinlockSharedInternal.lock();
|
||||
delete lcr;
|
||||
|
@ -286,8 +287,7 @@ void VulkanPipelineStableCache::LoadPipelineFromCache(std::span<uint8> fileData)
|
|||
s_spinlockSharedInternal.unlock();
|
||||
return;
|
||||
}
|
||||
pp.Compile(true, true, false);
|
||||
// destroy pp early
|
||||
pipelineCompiler.Compile(true, true, false);
|
||||
}
|
||||
// on success, calculate pipeline hash and flag as present in cache
|
||||
uint64 pipelineBaseHash = vertexShader->baseHash;
|
||||
|
|
|
@ -49,7 +49,9 @@ const std::vector<const char*> kOptionalDeviceExtensions =
|
|||
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
|
||||
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
|
||||
VK_KHR_PRESENT_WAIT_EXTENSION_NAME,
|
||||
VK_KHR_PRESENT_ID_EXTENSION_NAME
|
||||
VK_KHR_PRESENT_ID_EXTENSION_NAME,
|
||||
VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
|
||||
VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME
|
||||
};
|
||||
|
||||
const std::vector<const char*> kRequiredDeviceExtensions =
|
||||
|
@ -82,8 +84,6 @@ VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(VkDebugUtilsMessageSeverityFla
|
|||
if (strstr(pCallbackData->pMessage, "Number of currently valid sampler objects is not less than the maximum allowed"))
|
||||
return VK_FALSE;
|
||||
|
||||
assert_dbg();
|
||||
|
||||
#endif
|
||||
|
||||
cemuLog_log(LogType::Force, (char*)pCallbackData->pMessage);
|
||||
|
@ -264,6 +264,14 @@ void VulkanRenderer::GetDeviceFeatures()
|
|||
pwf.pNext = prevStruct;
|
||||
prevStruct = &pwf;
|
||||
|
||||
VkPhysicalDevicePipelineRobustnessFeaturesEXT pprf{};
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
{
|
||||
pprf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT;
|
||||
pprf.pNext = prevStruct;
|
||||
prevStruct = &pprf;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceFeatures2 physicalDeviceFeatures2{};
|
||||
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||
physicalDeviceFeatures2.pNext = prevStruct;
|
||||
|
@ -314,7 +322,15 @@ void VulkanRenderer::GetDeviceFeatures()
|
|||
cemuLog_log(LogType::Force, "VK_EXT_custom_border_color not supported. Cannot emulate arbitrary border color");
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_featureControl.deviceExtensions.depth_clip_enable)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "VK_EXT_depth_clip_enable not supported");
|
||||
}
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
{
|
||||
if ( pprf.pipelineRobustness != VK_TRUE )
|
||||
m_featureControl.deviceExtensions.pipeline_robustness = false;
|
||||
}
|
||||
// get limits
|
||||
m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(prop2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4);
|
||||
m_featureControl.limits.nonCoherentAtomSize = std::max(prop2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4);
|
||||
|
@ -473,11 +489,17 @@ VulkanRenderer::VulkanRenderer()
|
|||
deviceFeatures.occlusionQueryPrecise = VK_TRUE;
|
||||
deviceFeatures.depthClamp = VK_TRUE;
|
||||
deviceFeatures.depthBiasClamp = VK_TRUE;
|
||||
if (m_vendor == GfxVendor::AMD)
|
||||
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
{
|
||||
deviceFeatures.robustBufferAccess = VK_TRUE;
|
||||
cemuLog_log(LogType::Force, "Enable robust buffer access");
|
||||
deviceFeatures.robustBufferAccess = VK_FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "VK_EXT_pipeline_robustness not supported. Falling back to robustBufferAccess");
|
||||
deviceFeatures.robustBufferAccess = VK_TRUE;
|
||||
}
|
||||
|
||||
if (m_featureControl.mode.useTFEmulationViaSSBO)
|
||||
{
|
||||
deviceFeatures.vertexPipelineStoresAndAtomics = true;
|
||||
|
@ -522,6 +544,15 @@ VulkanRenderer::VulkanRenderer()
|
|||
deviceExtensionFeatures = &presentWaitFeature;
|
||||
presentWaitFeature.presentWait = VK_TRUE;
|
||||
}
|
||||
// enable VK_EXT_pipeline_robustness
|
||||
VkPhysicalDevicePipelineRobustnessFeaturesEXT pipelineRobustnessFeature{};
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
{
|
||||
pipelineRobustnessFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT;
|
||||
pipelineRobustnessFeature.pNext = deviceExtensionFeatures;
|
||||
deviceExtensionFeatures = &pipelineRobustnessFeature;
|
||||
pipelineRobustnessFeature.pipelineRobustness = VK_TRUE;
|
||||
}
|
||||
|
||||
std::vector<const char*> used_extensions;
|
||||
VkDeviceCreateInfo createInfo = CreateDeviceCreateInfo(queueCreateInfos, deviceFeatures, deviceExtensionFeatures, used_extensions);
|
||||
|
@ -1118,10 +1149,15 @@ VkDeviceCreateInfo VulkanRenderer::CreateDeviceCreateInfo(const std::vector<VkDe
|
|||
used_extensions.emplace_back(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||
if (m_featureControl.deviceExtensions.shader_float_controls)
|
||||
used_extensions.emplace_back(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
if (m_featureControl.deviceExtensions.depth_clip_enable)
|
||||
used_extensions.emplace_back(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME);
|
||||
if (m_featureControl.deviceExtensions.present_wait)
|
||||
{
|
||||
used_extensions.emplace_back(VK_KHR_PRESENT_ID_EXTENSION_NAME);
|
||||
if (m_featureControl.deviceExtensions.present_wait)
|
||||
used_extensions.emplace_back(VK_KHR_PRESENT_WAIT_EXTENSION_NAME);
|
||||
}
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
used_extensions.emplace_back(VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME);
|
||||
|
||||
VkDeviceCreateInfo createInfo{};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
|
@ -1218,6 +1254,8 @@ bool VulkanRenderer::CheckDeviceExtensionSupport(const VkPhysicalDevice device,
|
|||
info.deviceExtensions.synchronization2 = isExtensionAvailable(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||
info.deviceExtensions.shader_float_controls = isExtensionAvailable(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
info.deviceExtensions.dynamic_rendering = false; // isExtensionAvailable(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||
info.deviceExtensions.depth_clip_enable = isExtensionAvailable(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME);
|
||||
info.deviceExtensions.pipeline_robustness = isExtensionAvailable(VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME);
|
||||
// dynamic rendering doesn't provide any benefits for us right now. Driver implementations are very unoptimized as of Feb 2022
|
||||
info.deviceExtensions.present_wait = isExtensionAvailable(VK_KHR_PRESENT_WAIT_EXTENSION_NAME) && isExtensionAvailable(VK_KHR_PRESENT_ID_EXTENSION_NAME);
|
||||
|
||||
|
@ -1589,37 +1627,35 @@ void VulkanRenderer::DeleteNullObjects()
|
|||
|
||||
void VulkanRenderer::ImguiInit()
|
||||
{
|
||||
if (m_imguiRenderPass == VK_NULL_HANDLE)
|
||||
{
|
||||
// TODO: renderpass swapchain format may change between srgb and rgb -> need reinit
|
||||
VkAttachmentDescription colorAttachment = {};
|
||||
colorAttachment.format = m_mainSwapchainInfo->m_surfaceFormat.format;
|
||||
colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
colorAttachment.initialLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
VkRenderPass prevRenderPass = m_imguiRenderPass;
|
||||
|
||||
VkAttachmentReference colorAttachmentRef = {};
|
||||
colorAttachmentRef.attachment = 0;
|
||||
colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
VkSubpassDescription subpass = {};
|
||||
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
subpass.colorAttachmentCount = 1;
|
||||
subpass.pColorAttachments = &colorAttachmentRef;
|
||||
VkAttachmentDescription colorAttachment = {};
|
||||
colorAttachment.format = m_mainSwapchainInfo->m_surfaceFormat.format;
|
||||
colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
colorAttachment.initialLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
|
||||
VkRenderPassCreateInfo renderPassInfo = {};
|
||||
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
|
||||
renderPassInfo.attachmentCount = 1;
|
||||
renderPassInfo.pAttachments = &colorAttachment;
|
||||
renderPassInfo.subpassCount = 1;
|
||||
renderPassInfo.pSubpasses = &subpass;
|
||||
const auto result = vkCreateRenderPass(m_logicalDevice, &renderPassInfo, nullptr, &m_imguiRenderPass);
|
||||
if (result != VK_SUCCESS)
|
||||
throw VkException(result, "can't create imgui renderpass");
|
||||
}
|
||||
VkAttachmentReference colorAttachmentRef = {};
|
||||
colorAttachmentRef.attachment = 0;
|
||||
colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
VkSubpassDescription subpass = {};
|
||||
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
subpass.colorAttachmentCount = 1;
|
||||
subpass.pColorAttachments = &colorAttachmentRef;
|
||||
|
||||
VkRenderPassCreateInfo renderPassInfo = {};
|
||||
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
|
||||
renderPassInfo.attachmentCount = 1;
|
||||
renderPassInfo.pAttachments = &colorAttachment;
|
||||
renderPassInfo.subpassCount = 1;
|
||||
renderPassInfo.pSubpasses = &subpass;
|
||||
const auto result = vkCreateRenderPass(m_logicalDevice, &renderPassInfo, nullptr, &m_imguiRenderPass);
|
||||
if (result != VK_SUCCESS)
|
||||
throw VkException(result, "can't create imgui renderpass");
|
||||
|
||||
ImGui_ImplVulkan_InitInfo info{};
|
||||
info.Instance = m_instance;
|
||||
|
@ -1633,6 +1669,9 @@ void VulkanRenderer::ImguiInit()
|
|||
info.ImageCount = info.MinImageCount;
|
||||
|
||||
ImGui_ImplVulkan_Init(&info, m_imguiRenderPass);
|
||||
|
||||
if (prevRenderPass != VK_NULL_HANDLE)
|
||||
vkDestroyRenderPass(GetLogicalDevice(), prevRenderPass, nullptr);
|
||||
}
|
||||
|
||||
void VulkanRenderer::Initialize()
|
||||
|
@ -1645,10 +1684,10 @@ void VulkanRenderer::Initialize()
|
|||
|
||||
void VulkanRenderer::Shutdown()
|
||||
{
|
||||
DeleteFontTextures();
|
||||
Renderer::Shutdown();
|
||||
SubmitCommandBuffer();
|
||||
WaitDeviceIdle();
|
||||
DeleteFontTextures();
|
||||
Renderer::Shutdown();
|
||||
if (m_imguiRenderPass != VK_NULL_HANDLE)
|
||||
{
|
||||
vkDestroyRenderPass(m_logicalDevice, m_imguiRenderPass, nullptr);
|
||||
|
@ -4112,33 +4151,36 @@ VKRObjectFramebuffer::~VKRObjectFramebuffer()
|
|||
|
||||
VKRObjectPipeline::VKRObjectPipeline()
|
||||
{
|
||||
// todo
|
||||
}
|
||||
|
||||
void VKRObjectPipeline::setPipeline(VkPipeline newPipeline)
|
||||
void VKRObjectPipeline::SetPipeline(VkPipeline newPipeline)
|
||||
{
|
||||
cemu_assert_debug(pipeline == VK_NULL_HANDLE);
|
||||
pipeline = newPipeline;
|
||||
if(newPipeline != VK_NULL_HANDLE)
|
||||
if (m_pipeline == newPipeline)
|
||||
return;
|
||||
cemu_assert_debug(m_pipeline == VK_NULL_HANDLE); // replacing an already assigned pipeline is not intended
|
||||
if(m_pipeline == VK_NULL_HANDLE && newPipeline != VK_NULL_HANDLE)
|
||||
performanceMonitor.vk.numGraphicPipelines.increment();
|
||||
else if(m_pipeline != VK_NULL_HANDLE && newPipeline == VK_NULL_HANDLE)
|
||||
performanceMonitor.vk.numGraphicPipelines.decrement();
|
||||
m_pipeline = newPipeline;
|
||||
}
|
||||
|
||||
VKRObjectPipeline::~VKRObjectPipeline()
|
||||
{
|
||||
auto vkr = VulkanRenderer::GetInstance();
|
||||
if (pipeline != VK_NULL_HANDLE)
|
||||
if (m_pipeline != VK_NULL_HANDLE)
|
||||
{
|
||||
vkDestroyPipeline(vkr->GetLogicalDevice(), pipeline, nullptr);
|
||||
vkDestroyPipeline(vkr->GetLogicalDevice(), m_pipeline, nullptr);
|
||||
performanceMonitor.vk.numGraphicPipelines.decrement();
|
||||
}
|
||||
if (vertexDSL != VK_NULL_HANDLE)
|
||||
vkDestroyDescriptorSetLayout(vkr->GetLogicalDevice(), vertexDSL, nullptr);
|
||||
if (pixelDSL != VK_NULL_HANDLE)
|
||||
vkDestroyDescriptorSetLayout(vkr->GetLogicalDevice(), pixelDSL, nullptr);
|
||||
if (geometryDSL != VK_NULL_HANDLE)
|
||||
vkDestroyDescriptorSetLayout(vkr->GetLogicalDevice(), geometryDSL, nullptr);
|
||||
if (pipeline_layout != VK_NULL_HANDLE)
|
||||
vkDestroyPipelineLayout(vkr->GetLogicalDevice(), pipeline_layout, nullptr);
|
||||
if (m_vertexDSL != VK_NULL_HANDLE)
|
||||
vkDestroyDescriptorSetLayout(vkr->GetLogicalDevice(), m_vertexDSL, nullptr);
|
||||
if (m_pixelDSL != VK_NULL_HANDLE)
|
||||
vkDestroyDescriptorSetLayout(vkr->GetLogicalDevice(), m_pixelDSL, nullptr);
|
||||
if (m_geometryDSL != VK_NULL_HANDLE)
|
||||
vkDestroyDescriptorSetLayout(vkr->GetLogicalDevice(), m_geometryDSL, nullptr);
|
||||
if (m_pipelineLayout != VK_NULL_HANDLE)
|
||||
vkDestroyPipelineLayout(vkr->GetLogicalDevice(), m_pipelineLayout, nullptr);
|
||||
}
|
||||
|
||||
VKRObjectDescriptorSet::VKRObjectDescriptorSet()
|
||||
|
|
|
@ -452,6 +452,8 @@ private:
|
|||
bool dynamic_rendering = false; // VK_KHR_dynamic_rendering
|
||||
bool shader_float_controls = false; // VK_KHR_shader_float_controls
|
||||
bool present_wait = false; // VK_KHR_present_wait
|
||||
bool depth_clip_enable = false; // VK_EXT_depth_clip_enable
|
||||
bool pipeline_robustness = false; // VK_EXT_pipeline_robustness
|
||||
}deviceExtensions;
|
||||
|
||||
struct
|
||||
|
|
|
@ -298,7 +298,8 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
|
|||
// init pipeline compiler
|
||||
PipelineCompiler* pipelineCompiler = new PipelineCompiler();
|
||||
|
||||
pipelineCompiler->InitFromCurrentGPUState(pipelineInfo, LatteGPUState.contextNew, vkFBO->GetRenderPassObj());
|
||||
bool requiresRobustBufferAccess = PipelineCompiler::CalcRobustBufferAccessRequirement(vertexShader, pixelShader, geometryShader);
|
||||
pipelineCompiler->InitFromCurrentGPUState(pipelineInfo, LatteGPUState.contextNew, vkFBO->GetRenderPassObj(), requiresRobustBufferAccess);
|
||||
pipelineCompiler->TrackAsCached(vsBaseHash, pipelineHash);
|
||||
|
||||
// use heuristics based on parameter patterns to determine if the current drawcall is essential (non-skipable)
|
||||
|
@ -603,7 +604,7 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo*
|
|||
const auto it = pipeline_info->vertex_ds_cache.find(stateHash);
|
||||
if (it != pipeline_info->vertex_ds_cache.cend())
|
||||
return it->second;
|
||||
descriptor_set_layout = pipeline_info->m_vkrObjPipeline->vertexDSL;
|
||||
descriptor_set_layout = pipeline_info->m_vkrObjPipeline->m_vertexDSL;
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
|
@ -611,7 +612,7 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo*
|
|||
const auto it = pipeline_info->pixel_ds_cache.find(stateHash);
|
||||
if (it != pipeline_info->pixel_ds_cache.cend())
|
||||
return it->second;
|
||||
descriptor_set_layout = pipeline_info->m_vkrObjPipeline->pixelDSL;
|
||||
descriptor_set_layout = pipeline_info->m_vkrObjPipeline->m_pixelDSL;
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
|
@ -619,7 +620,7 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo*
|
|||
const auto it = pipeline_info->geometry_ds_cache.find(stateHash);
|
||||
if (it != pipeline_info->geometry_ds_cache.cend())
|
||||
return it->second;
|
||||
descriptor_set_layout = pipeline_info->m_vkrObjPipeline->geometryDSL;
|
||||
descriptor_set_layout = pipeline_info->m_vkrObjPipeline->m_geometryDSL;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -1481,8 +1482,7 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
}
|
||||
|
||||
auto vkObjPipeline = pipeline_info->m_vkrObjPipeline;
|
||||
|
||||
if (vkObjPipeline->pipeline == VK_NULL_HANDLE)
|
||||
if (vkObjPipeline->GetPipeline() == VK_NULL_HANDLE)
|
||||
{
|
||||
// invalid/uninitialized pipeline
|
||||
m_state.activeVertexDS = nullptr;
|
||||
|
@ -1509,11 +1509,11 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
|
||||
draw_setRenderPass();
|
||||
|
||||
if (m_state.currentPipeline != vkObjPipeline->pipeline)
|
||||
if (m_state.currentPipeline != vkObjPipeline->GetPipeline())
|
||||
{
|
||||
vkCmdBindPipeline(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, vkObjPipeline->pipeline);
|
||||
vkCmdBindPipeline(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, vkObjPipeline->GetPipeline());
|
||||
vkObjPipeline->flagForCurrentCommandBuffer();
|
||||
m_state.currentPipeline = vkObjPipeline->pipeline;
|
||||
m_state.currentPipeline = vkObjPipeline->GetPipeline();
|
||||
// depth bias
|
||||
if (pipeline_info->usesDepthBias)
|
||||
draw_updateDepthBias(true);
|
||||
|
@ -1545,7 +1545,7 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
dsArray[1] = pixelDS->m_vkObjDescriptorSet->descriptorSet;
|
||||
|
||||
vkCmdBindDescriptorSets(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
vkObjPipeline->pipeline_layout, 0, 2, dsArray, numDynOffsetsVS + numDynOffsetsPS,
|
||||
vkObjPipeline->m_pipelineLayout, 0, 2, dsArray, numDynOffsetsVS + numDynOffsetsPS,
|
||||
dynamicOffsets);
|
||||
}
|
||||
else if (vertexDS)
|
||||
|
@ -1554,7 +1554,7 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
draw_prepareDynamicOffsetsForDescriptorSet(VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX, dynamicOffsets, numDynOffsets,
|
||||
pipeline_info);
|
||||
vkCmdBindDescriptorSets(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
vkObjPipeline->pipeline_layout, 0, 1, &vertexDS->m_vkObjDescriptorSet->descriptorSet, numDynOffsets,
|
||||
vkObjPipeline->m_pipelineLayout, 0, 1, &vertexDS->m_vkObjDescriptorSet->descriptorSet, numDynOffsets,
|
||||
dynamicOffsets);
|
||||
}
|
||||
else if (pixelDS)
|
||||
|
@ -1563,7 +1563,7 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
draw_prepareDynamicOffsetsForDescriptorSet(VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT, dynamicOffsets, numDynOffsets,
|
||||
pipeline_info);
|
||||
vkCmdBindDescriptorSets(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
vkObjPipeline->pipeline_layout, 1, 1, &pixelDS->m_vkObjDescriptorSet->descriptorSet, numDynOffsets,
|
||||
vkObjPipeline->m_pipelineLayout, 1, 1, &pixelDS->m_vkObjDescriptorSet->descriptorSet, numDynOffsets,
|
||||
dynamicOffsets);
|
||||
}
|
||||
if (geometryDS)
|
||||
|
@ -1572,7 +1572,7 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
draw_prepareDynamicOffsetsForDescriptorSet(VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY, dynamicOffsets, numDynOffsets,
|
||||
pipeline_info);
|
||||
vkCmdBindDescriptorSets(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
vkObjPipeline->pipeline_layout, 2, 1, &geometryDS->m_vkObjDescriptorSet->descriptorSet, numDynOffsets,
|
||||
vkObjPipeline->m_pipelineLayout, 2, 1, &geometryDS->m_vkObjDescriptorSet->descriptorSet, numDynOffsets,
|
||||
dynamicOffsets);
|
||||
}
|
||||
|
||||
|
|
|
@ -357,7 +357,7 @@ CopySurfacePipelineInfo* VulkanRenderer::copySurface_getOrCreateGraphicsPipeline
|
|||
layoutInfo.bindingCount = (uint32_t)descriptorSetLayoutBindings.size();
|
||||
layoutInfo.pBindings = descriptorSetLayoutBindings.data();
|
||||
|
||||
if (vkCreateDescriptorSetLayout(m_logicalDevice, &layoutInfo, nullptr, &vkObjPipeline->pixelDSL) != VK_SUCCESS)
|
||||
if (vkCreateDescriptorSetLayout(m_logicalDevice, &layoutInfo, nullptr, &vkObjPipeline->m_pixelDSL) != VK_SUCCESS)
|
||||
UnrecoverableError(fmt::format("Failed to create descriptor set layout for surface copy shader").c_str());
|
||||
|
||||
// ##########################################################################################################################################
|
||||
|
@ -370,15 +370,15 @@ CopySurfacePipelineInfo* VulkanRenderer::copySurface_getOrCreateGraphicsPipeline
|
|||
VkPipelineLayoutCreateInfo pipelineLayoutInfo{};
|
||||
pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
||||
pipelineLayoutInfo.setLayoutCount = 1;
|
||||
pipelineLayoutInfo.pSetLayouts = &vkObjPipeline->pixelDSL;
|
||||
pipelineLayoutInfo.pSetLayouts = &vkObjPipeline->m_pixelDSL;
|
||||
pipelineLayoutInfo.pPushConstantRanges = &pushConstantRange;
|
||||
pipelineLayoutInfo.pushConstantRangeCount = 1;
|
||||
|
||||
VkResult result = vkCreatePipelineLayout(m_logicalDevice, &pipelineLayoutInfo, nullptr, &vkObjPipeline->pipeline_layout);
|
||||
VkResult result = vkCreatePipelineLayout(m_logicalDevice, &pipelineLayoutInfo, nullptr, &vkObjPipeline->m_pipelineLayout);
|
||||
if (result != VK_SUCCESS)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result);
|
||||
vkObjPipeline->pipeline = VK_NULL_HANDLE;
|
||||
vkObjPipeline->SetPipeline(VK_NULL_HANDLE);
|
||||
return copyPipeline;
|
||||
}
|
||||
|
||||
|
@ -425,7 +425,7 @@ CopySurfacePipelineInfo* VulkanRenderer::copySurface_getOrCreateGraphicsPipeline
|
|||
pipelineInfo.pRasterizationState = &rasterizer;
|
||||
pipelineInfo.pMultisampleState = &multisampling;
|
||||
pipelineInfo.pColorBlendState = state.destinationTexture->isDepth?nullptr:&colorBlending;
|
||||
pipelineInfo.layout = vkObjPipeline->pipeline_layout;
|
||||
pipelineInfo.layout = vkObjPipeline->m_pipelineLayout;
|
||||
pipelineInfo.renderPass = copyPipeline->vkObjRenderPass->m_renderPass;
|
||||
pipelineInfo.pDepthStencilState = &depthStencilState;
|
||||
pipelineInfo.subpass = 0;
|
||||
|
@ -434,17 +434,16 @@ CopySurfacePipelineInfo* VulkanRenderer::copySurface_getOrCreateGraphicsPipeline
|
|||
|
||||
copyPipeline->vkObjPipeline = vkObjPipeline;
|
||||
|
||||
result = vkCreateGraphicsPipelines(m_logicalDevice, m_pipeline_cache, 1, &pipelineInfo, nullptr, ©Pipeline->vkObjPipeline->pipeline);
|
||||
VkPipeline pipeline = VK_NULL_HANDLE;
|
||||
result = vkCreateGraphicsPipelines(m_logicalDevice, m_pipeline_cache, 1, &pipelineInfo, nullptr, &pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
{
|
||||
copyPipeline->vkObjPipeline->SetPipeline(nullptr);
|
||||
cemuLog_log(LogType::Force, "Failed to create graphics pipeline for surface copy. Error {} Info:", (sint32)result);
|
||||
cemu_assert_debug(false);
|
||||
copyPipeline->vkObjPipeline->pipeline = VK_NULL_HANDLE;
|
||||
cemu_assert_suspicious();
|
||||
}
|
||||
//performanceMonitor.vk.numGraphicPipelines.increment();
|
||||
|
||||
//m_pipeline_cache_semaphore.notify();
|
||||
|
||||
else
|
||||
copyPipeline->vkObjPipeline->SetPipeline(pipeline);
|
||||
return copyPipeline;
|
||||
}
|
||||
|
||||
|
@ -522,7 +521,7 @@ VKRObjectDescriptorSet* VulkanRenderer::surfaceCopy_getOrCreateDescriptorSet(VkC
|
|||
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
allocInfo.descriptorPool = m_descriptorPool;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &(pipelineInfo->vkObjPipeline->pixelDSL);
|
||||
allocInfo.pSetLayouts = &pipelineInfo->vkObjPipeline->m_pixelDSL;
|
||||
|
||||
if (vkAllocateDescriptorSets(m_logicalDevice, &allocInfo, &vkObjDescriptorSet->descriptorSet) != VK_SUCCESS)
|
||||
{
|
||||
|
@ -644,7 +643,7 @@ void VulkanRenderer::surfaceCopy_viaDrawcall(LatteTextureVk* srcTextureVk, sint3
|
|||
pushConstantData.srcTexelOffset[0] = 0;
|
||||
pushConstantData.srcTexelOffset[1] = 0;
|
||||
|
||||
vkCmdPushConstants(m_state.currentCommandBuffer, copySurfacePipelineInfo->vkObjPipeline->pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pushConstantData), &pushConstantData);
|
||||
vkCmdPushConstants(m_state.currentCommandBuffer, copySurfacePipelineInfo->vkObjPipeline->m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pushConstantData), &pushConstantData);
|
||||
|
||||
// draw
|
||||
VkRenderPassBeginInfo renderPassInfo{};
|
||||
|
@ -680,13 +679,13 @@ void VulkanRenderer::surfaceCopy_viaDrawcall(LatteTextureVk* srcTextureVk, sint3
|
|||
|
||||
vkCmdBeginRenderPass(m_state.currentCommandBuffer, &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
vkCmdBindPipeline(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, copySurfacePipelineInfo->vkObjPipeline->pipeline);
|
||||
vkCmdBindPipeline(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, copySurfacePipelineInfo->vkObjPipeline->GetPipeline());
|
||||
copySurfacePipelineInfo->vkObjPipeline->flagForCurrentCommandBuffer();
|
||||
|
||||
m_state.currentPipeline = copySurfacePipelineInfo->vkObjPipeline->pipeline;
|
||||
m_state.currentPipeline = copySurfacePipelineInfo->vkObjPipeline->GetPipeline();
|
||||
|
||||
vkCmdBindDescriptorSets(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
copySurfacePipelineInfo->vkObjPipeline->pipeline_layout, 0, 1, &vkObjDescriptorSet->descriptorSet, 0, nullptr);
|
||||
copySurfacePipelineInfo->vkObjPipeline->m_pipelineLayout, 0, 1, &vkObjDescriptorSet->descriptorSet, 0, nullptr);
|
||||
vkObjDescriptorSet->flagForCurrentCommandBuffer();
|
||||
|
||||
vkCmdDraw(m_state.currentCommandBuffer, 6, 1, 0, 0);
|
||||
|
|
|
@ -500,7 +500,7 @@ namespace MMU
|
|||
|
||||
// todo - instead of passing the physical address to Read/WriteMMIO we should pass an interface id and a relative address? This would allow remapping the hardware address (tho we can just unregister + register at different addresses)
|
||||
|
||||
uint16 ReadMMIO_32(PAddr address)
|
||||
uint32 ReadMMIO_32(PAddr address)
|
||||
{
|
||||
cemu_assert_debug((address & 0x3) == 0);
|
||||
auto itr = g_mmioHandlerR32->find(address);
|
||||
|
|
|
@ -261,7 +261,7 @@ namespace MMU
|
|||
|
||||
void WriteMMIO_32(PAddr address, uint32 value);
|
||||
void WriteMMIO_16(PAddr address, uint16 value);
|
||||
uint16 ReadMMIO_32(PAddr address);
|
||||
uint32 ReadMMIO_32(PAddr address);
|
||||
uint16 ReadMMIO_16(PAddr address);
|
||||
|
||||
}
|
||||
|
|
|
@ -87,7 +87,6 @@ namespace HW_SI
|
|||
|
||||
HWREG::SICOMCSR SI_COMCSR_R32(PAddr addr)
|
||||
{
|
||||
//cemuLog_logDebug(LogType::Force, "Read SICOMCSR");
|
||||
return g_si.registerState.sicomcsr;
|
||||
}
|
||||
|
||||
|
|
|
@ -464,5 +464,34 @@ namespace iosu
|
|||
return static_cast<IOSUModule*>(&sIOSUModuleNNPDM);
|
||||
}
|
||||
|
||||
|
||||
bool GameListStat::LastPlayDate::operator<(const LastPlayDate& b) const
|
||||
{
|
||||
const auto& a = *this;
|
||||
|
||||
if(a.year < b.year)
|
||||
return true;
|
||||
if(a.year > b.year)
|
||||
return false;
|
||||
|
||||
// same year
|
||||
if(a.month < b.month)
|
||||
return true;
|
||||
if(a.month > b.month)
|
||||
return false;
|
||||
|
||||
// same year and month
|
||||
return a.day < b.day;
|
||||
}
|
||||
|
||||
bool GameListStat::LastPlayDate::operator==(const LastPlayDate& b) const
|
||||
{
|
||||
const auto& a = *this;
|
||||
return a.year == b.year &&
|
||||
a.month == b.month &&
|
||||
a.day == b.day;
|
||||
}
|
||||
std::weak_ordering GameListStat::LastPlayDate::operator<=>(const LastPlayDate& b) const = default;
|
||||
|
||||
};
|
||||
};
|
||||
|
|
|
@ -21,11 +21,15 @@ namespace iosu
|
|||
/* Helper for UI game list */
|
||||
struct GameListStat
|
||||
{
|
||||
struct
|
||||
struct LastPlayDate
|
||||
{
|
||||
uint32 year; // if 0 -> never played
|
||||
uint32 month;
|
||||
uint32 day;
|
||||
|
||||
bool operator<(const LastPlayDate& b) const;
|
||||
bool operator==(const LastPlayDate& b) const;
|
||||
std::weak_ordering operator<=>(const LastPlayDate& b) const;
|
||||
}last_played;
|
||||
uint32 numMinutesPlayed;
|
||||
};
|
||||
|
|
|
@ -502,6 +502,7 @@ namespace iosu
|
|||
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, task_header_callback);
|
||||
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &(*it));
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 0x3C);
|
||||
curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
|
||||
if (IsNetworkServiceSSLDisabled(ActiveSettings::GetNetworkService()))
|
||||
{
|
||||
curl_easy_setopt(curl,CURLOPT_SSL_VERIFYPEER,0L);
|
||||
|
|
|
@ -132,7 +132,7 @@ namespace iosu
|
|||
|
||||
void convertMultiByteStringToBigEndianWidechar(const char* input, uint16be* output, sint32 maxOutputLength)
|
||||
{
|
||||
std::basic_string<uint16be> beStr = StringHelpers::FromUtf8(input);
|
||||
std::vector<uint16be> beStr = StringHelpers::FromUtf8(input);
|
||||
if (beStr.size() >= maxOutputLength - 1)
|
||||
beStr.resize(maxOutputLength-1);
|
||||
for (size_t i = 0; i < beStr.size(); i++)
|
||||
|
@ -723,7 +723,7 @@ namespace iosu
|
|||
{
|
||||
if(numVecIn != 0 || numVecOut != 1)
|
||||
return FPResult_InvalidIPCParam;
|
||||
std::basic_string<uint16be> myComment;
|
||||
std::vector<uint16be> myComment;
|
||||
if(g_fpd.nexFriendSession)
|
||||
{
|
||||
if(vecOut->size != MY_COMMENT_LENGTH * sizeof(uint16be))
|
||||
|
@ -735,8 +735,8 @@ namespace iosu
|
|||
g_fpd.nexFriendSession->getMyComment(myNexComment);
|
||||
myComment = StringHelpers::FromUtf8(myNexComment.commentString);
|
||||
}
|
||||
myComment.insert(0, 1, '\0');
|
||||
memcpy(vecOut->basePhys.GetPtr(), myComment.c_str(), MY_COMMENT_LENGTH * sizeof(uint16be));
|
||||
myComment.insert(myComment.begin(), '\0');
|
||||
memcpy(vecOut->basePhys.GetPtr(), myComment.data(), MY_COMMENT_LENGTH * sizeof(uint16be));
|
||||
return FPResult_Ok;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,28 +1,161 @@
|
|||
#include "Cafe/OS/common/OSCommon.h"
|
||||
#include "Cafe/OS/libs/TCL/TCL.h"
|
||||
|
||||
#include "HW/Latte/Core/LattePM4.h"
|
||||
|
||||
namespace TCL
|
||||
{
|
||||
SysAllocator<coreinit::OSEvent> s_updateRetirementEvent;
|
||||
uint64 s_currentRetireMarker = 0;
|
||||
|
||||
enum class TCL_SUBMISSION_FLAG : uint32
|
||||
struct TCLStatePPC // mapped into PPC space
|
||||
{
|
||||
SURFACE_SYNC = 0x400000, // submit surface sync packet before cmd
|
||||
TRIGGER_INTERRUPT = 0x200000, // probably
|
||||
UKN_20000000 = 0x20000000,
|
||||
uint64be gpuRetireMarker; // written by GPU
|
||||
};
|
||||
|
||||
int TCLSubmitToRing(uint32be* cmd, uint32 cmdLen, uint32be* controlFlags, uint64* submissionTimestamp)
|
||||
SysAllocator<TCLStatePPC> s_tclStatePPC;
|
||||
|
||||
// called from GPU for timestamp EOP event
|
||||
void TCLGPUNotifyNewRetirementTimestamp()
|
||||
{
|
||||
// todo - figure out all the bits of *controlFlags
|
||||
// if submissionTimestamp != nullptr then set it to the timestamp of the submission. Note: We should make sure that uint64's are written atomically by the GPU command processor
|
||||
// gpuRetireMarker is updated via event eop command
|
||||
__OSLockScheduler();
|
||||
coreinit::OSSignalEventAllInternal(s_updateRetirementEvent.GetPtr());
|
||||
__OSUnlockScheduler();
|
||||
}
|
||||
|
||||
cemu_assert_debug(false);
|
||||
int TCLTimestamp(TCLTimestampId id, uint64be* timestampOut)
|
||||
{
|
||||
if (id == TCLTimestampId::TIMESTAMP_LAST_BUFFER_RETIRED)
|
||||
{
|
||||
MEMPTR<uint32> b;
|
||||
// this is the timestamp of the last buffer that was retired by the GPU
|
||||
stdx::atomic_ref<uint64be> retireTimestamp(s_tclStatePPC->gpuRetireMarker);
|
||||
*timestampOut = retireTimestamp.load();
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "TCLTimestamp(): Unsupported timestamp ID {}", (uint32)id);
|
||||
*timestampOut = 0;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int TCLWaitTimestamp(TCLTimestampId id, uint64 waitTs, uint64 timeout)
|
||||
{
|
||||
if (id == TCLTimestampId::TIMESTAMP_LAST_BUFFER_RETIRED)
|
||||
{
|
||||
while ( true )
|
||||
{
|
||||
stdx::atomic_ref<uint64be> retireTimestamp(s_tclStatePPC->gpuRetireMarker);
|
||||
uint64 currentTimestamp = retireTimestamp.load();
|
||||
if (currentTimestamp >= waitTs)
|
||||
return 0;
|
||||
coreinit::OSWaitEvent(s_updateRetirementEvent.GetPtr());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "TCLWaitTimestamp(): Unsupported timestamp ID {}", (uint32)id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static constexpr uint32 TCL_RING_BUFFER_SIZE = 4096; // in U32s
|
||||
|
||||
std::atomic<uint32> tclRingBufferA[TCL_RING_BUFFER_SIZE];
|
||||
std::atomic<uint32> tclRingBufferA_readIndex{0};
|
||||
uint32 tclRingBufferA_writeIndex{0};
|
||||
|
||||
// GPU code calls this to grab the next command word
|
||||
bool TCLGPUReadRBWord(uint32& cmdWord)
|
||||
{
|
||||
if (tclRingBufferA_readIndex == tclRingBufferA_writeIndex)
|
||||
return false;
|
||||
cmdWord = tclRingBufferA[tclRingBufferA_readIndex];
|
||||
tclRingBufferA_readIndex = (tclRingBufferA_readIndex+1) % TCL_RING_BUFFER_SIZE;
|
||||
return true;
|
||||
}
|
||||
|
||||
void TCLWaitForRBSpace(uint32be numU32s)
|
||||
{
|
||||
while ( true )
|
||||
{
|
||||
uint32 distance = (tclRingBufferA_readIndex + TCL_RING_BUFFER_SIZE - tclRingBufferA_writeIndex) & (TCL_RING_BUFFER_SIZE - 1);
|
||||
if (tclRingBufferA_writeIndex == tclRingBufferA_readIndex) // buffer completely empty
|
||||
distance = TCL_RING_BUFFER_SIZE;
|
||||
if (distance >= numU32s+1) // assume distance minus one, because we are never allowed to completely wrap around
|
||||
break;
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
// this function assumes that TCLWaitForRBSpace was called and that there is enough space
|
||||
void TCLWriteCmd(uint32be* cmd, uint32 cmdLen)
|
||||
{
|
||||
while (cmdLen > 0)
|
||||
{
|
||||
tclRingBufferA[tclRingBufferA_writeIndex] = *cmd;
|
||||
tclRingBufferA_writeIndex++;
|
||||
tclRingBufferA_writeIndex &= (TCL_RING_BUFFER_SIZE - 1);
|
||||
cmd++;
|
||||
cmdLen--;
|
||||
}
|
||||
}
|
||||
|
||||
#define EVENT_TYPE_TS 5
|
||||
|
||||
void TCLSubmitRetireMarker(bool triggerEventInterrupt)
|
||||
{
|
||||
s_currentRetireMarker++;
|
||||
uint32be cmd[6];
|
||||
cmd[0] = pm4HeaderType3(IT_EVENT_WRITE_EOP, 5);
|
||||
cmd[1] = (4 | (EVENT_TYPE_TS << 8)); // event type (bits 8-15) and event index (bits 0-7).
|
||||
cmd[2] = MEMPTR<void>(&s_tclStatePPC->gpuRetireMarker).GetMPTR(); // address lower 32bits + data sel bits
|
||||
cmd[3] = 0x40000000; // select 64bit write, lower 16 bits are the upper bits of the address
|
||||
if (triggerEventInterrupt)
|
||||
cmd[3] |= 0x2000000; // trigger interrupt after value has been written
|
||||
cmd[4] = (uint32)s_currentRetireMarker; // data lower 32 bits
|
||||
cmd[5] = (uint32)(s_currentRetireMarker>>32); // data higher 32 bits
|
||||
TCLWriteCmd(cmd, 6);
|
||||
}
|
||||
|
||||
int TCLSubmitToRing(uint32be* cmd, uint32 cmdLen, betype<TCLSubmissionFlag>* controlFlags, uint64be* timestampValueOut)
|
||||
{
|
||||
TCLSubmissionFlag flags = *controlFlags;
|
||||
cemu_assert_debug(timestampValueOut); // handle case where this is null
|
||||
|
||||
// make sure there is enough space to submit all commands at one
|
||||
uint32 totalCommandLength = cmdLen;
|
||||
totalCommandLength += 6; // space needed for TCLSubmitRetireMarker
|
||||
|
||||
TCLWaitForRBSpace(totalCommandLength);
|
||||
|
||||
// submit command buffer
|
||||
TCLWriteCmd(cmd, cmdLen);
|
||||
|
||||
// create new marker timestamp and tell GPU to write it to our variable after its done processing the command
|
||||
if ((HAS_FLAG(flags, TCLSubmissionFlag::USE_RETIRED_MARKER)))
|
||||
{
|
||||
TCLSubmitRetireMarker(!HAS_FLAG(flags, TCLSubmissionFlag::NO_MARKER_INTERRUPT));
|
||||
*timestampValueOut = s_currentRetireMarker; // incremented before each submit
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Initialize()
|
||||
{
|
||||
cafeExportRegister("TCL", TCLSubmitToRing, LogType::Placeholder);
|
||||
cafeExportRegister("TCL", TCLTimestamp, LogType::Placeholder);
|
||||
cafeExportRegister("TCL", TCLWaitTimestamp, LogType::Placeholder);
|
||||
|
||||
s_currentRetireMarker = 0;
|
||||
s_tclStatePPC->gpuRetireMarker = 0;
|
||||
coreinit::OSInitEvent(s_updateRetirementEvent.GetPtr(), coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,25 @@
|
|||
namespace TCL
|
||||
{
|
||||
enum class TCLTimestampId
|
||||
{
|
||||
TIMESTAMP_LAST_BUFFER_RETIRED = 1,
|
||||
};
|
||||
|
||||
enum class TCLSubmissionFlag : uint32
|
||||
{
|
||||
SURFACE_SYNC = 0x400000, // submit surface sync packet before cmd
|
||||
NO_MARKER_INTERRUPT = 0x200000,
|
||||
USE_RETIRED_MARKER = 0x20000000, // Controls whether the timer is updated before or after (retired) the cmd. Also controls which timestamp is returned for the submission. Before and after using separate counters
|
||||
};
|
||||
|
||||
int TCLTimestamp(TCLTimestampId id, uint64be* timestampOut);
|
||||
int TCLWaitTimestamp(TCLTimestampId id, uint64 waitTs, uint64 timeout);
|
||||
int TCLSubmitToRing(uint32be* cmd, uint32 cmdLen, betype<TCLSubmissionFlag>* controlFlags, uint64be* timestampValueOut);
|
||||
|
||||
// called from Latte code
|
||||
bool TCLGPUReadRBWord(uint32& cmdWord);
|
||||
void TCLGPUNotifyNewRetirementTimestamp();
|
||||
|
||||
void Initialize();
|
||||
}
|
||||
}
|
||||
ENABLE_BITMASK_OPERATORS(TCL::TCLSubmissionFlag);
|
||||
|
|
|
@ -742,7 +742,8 @@ namespace coreinit
|
|||
}
|
||||
|
||||
__FSCmdSubmitResult(cmd, fsStatus);
|
||||
__FSUpdateQueue(&cmd->fsClientBody->fsCmdQueue);
|
||||
// dont read from cmd after this point, since the game could already have modified it
|
||||
__FSUpdateQueue(&client->fsCmdQueue);
|
||||
osLib_returnFromFunction(hCPU, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -96,7 +96,6 @@ namespace coreinit
|
|||
{
|
||||
ppcDefineParamU32(screenIndex, 0);
|
||||
cemu_assert(screenIndex < 2);
|
||||
cemuLog_logDebug(LogType::Force, "OSScreenFlipBuffersEx {}", screenIndex);
|
||||
LatteGPUState.osScreen.screen[screenIndex].flipRequestCount++;
|
||||
_updateCurrentDrawScreen(screenIndex);
|
||||
osLib_returnFromFunction(hCPU, 0);
|
||||
|
|
|
@ -25,7 +25,11 @@ void nnNfp_update();
|
|||
|
||||
namespace coreinit
|
||||
{
|
||||
#ifdef __arm64__
|
||||
void __OSFiberThreadEntry(uint32, uint32);
|
||||
#else
|
||||
void __OSFiberThreadEntry(void* thread);
|
||||
#endif
|
||||
void __OSAddReadyThreadToRunQueue(OSThread_t* thread);
|
||||
void __OSRemoveThreadFromRunQueues(OSThread_t* thread);
|
||||
};
|
||||
|
@ -49,7 +53,7 @@ namespace coreinit
|
|||
|
||||
struct OSHostThread
|
||||
{
|
||||
OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber(__OSFiberThreadEntry, this, this)
|
||||
OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber((void(*)(void*))__OSFiberThreadEntry, this, this)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -713,7 +717,10 @@ namespace coreinit
|
|||
thread->id = 0x8000;
|
||||
|
||||
if (!thread->deallocatorFunc.IsNull())
|
||||
{
|
||||
__OSQueueThreadDeallocation(thread);
|
||||
PPCCore_switchToSchedulerWithLock(); // make sure the deallocation function runs before we return
|
||||
}
|
||||
|
||||
__OSUnlockScheduler();
|
||||
|
||||
|
@ -1304,8 +1311,14 @@ namespace coreinit
|
|||
__OSThreadStartTimeslice(hostThread->m_thread, &hostThread->ppcInstance);
|
||||
}
|
||||
|
||||
#ifdef __arm64__
|
||||
void __OSFiberThreadEntry(uint32 _high, uint32 _low)
|
||||
{
|
||||
uint64 _thread = (uint64) _high << 32 | _low;
|
||||
#else
|
||||
void __OSFiberThreadEntry(void* _thread)
|
||||
{
|
||||
#endif
|
||||
OSHostThread* hostThread = (OSHostThread*)_thread;
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
|
@ -1515,7 +1528,7 @@ namespace coreinit
|
|||
}
|
||||
|
||||
// queue thread deallocation to run after current thread finishes
|
||||
// the termination threads run at a higher priority on the same threads
|
||||
// the termination threads run at a higher priority on the same core
|
||||
void __OSQueueThreadDeallocation(OSThread_t* thread)
|
||||
{
|
||||
uint32 coreIndex = OSGetCoreId();
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue